Skip to content

Commit db98ac4

Browse files
authored
[LV] Use shl for ((VF * Step) * vscale) in createStepForVF. (#153495)
Directly emit shl instead of a multiply if VF * Step is a power-of-2. The main motivation here is to prepare the code and test for directly generating and expanding a SCEV expression of the minimum iteration count. SCEVExpander will directly emit shl for multiplies with powers-of-2. InstCombine will also performs this combine, so end-to-end this should effectively by NFC. PR: #153495
1 parent 98728d9 commit db98ac4

File tree

78 files changed

+610
-600
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+610
-600
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,14 @@ namespace llvm {
824824
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
825825
int64_t Step) {
826826
assert(Ty->isIntegerTy() && "Expected an integer step");
827-
return B.CreateElementCount(Ty, VF.multiplyCoefficientBy(Step));
827+
ElementCount VFxStep = VF.multiplyCoefficientBy(Step);
828+
assert(isPowerOf2_64(VF.getKnownMinValue()) && "must pass power-of-2 VF");
829+
if (VF.isScalable() && isPowerOf2_64(Step)) {
830+
return B.CreateShl(
831+
B.CreateVScale(Ty),
832+
ConstantInt::get(Ty, Log2_64(VFxStep.getKnownMinValue())), "", true);
833+
}
834+
return B.CreateElementCount(Ty, VFxStep);
828835
}
829836

830837
/// Return the runtime value for VF.
@@ -2298,7 +2305,10 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF,
22982305
// Reuse existing vector loop preheader for TC checks.
22992306
// Note that new preheader block is generated for vector loop.
23002307
BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
2301-
IRBuilder<> Builder(TCCheckBlock->getTerminator());
2308+
IRBuilder<InstSimplifyFolder> Builder(
2309+
TCCheckBlock->getContext(),
2310+
InstSimplifyFolder(TCCheckBlock->getDataLayout()));
2311+
Builder.SetInsertPoint(TCCheckBlock->getTerminator());
23022312

23032313
// If tail is to be folded, vector loop takes care of all iterations.
23042314
Value *Count = getTripCount();
@@ -2310,7 +2320,7 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF,
23102320
return createStepForVF(Builder, CountTy, VF, UF);
23112321

23122322
Value *MinProfTC =
2313-
createStepForVF(Builder, CountTy, MinProfitableTripCount, 1);
2323+
Builder.CreateElementCount(CountTy, MinProfitableTripCount);
23142324
if (!VF.isScalable())
23152325
return MinProfTC;
23162326
return Builder.CreateBinaryIntrinsic(

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
702702
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
703703
; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
704704
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
705-
; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
705+
; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1
706706
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
707707
; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 257, [[TMP7]]
708708
; PRED-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
77
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
88
; CHECK-NEXT: [[ENTRY:.*]]:
99
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
10-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
11-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
10+
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
11+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2
1212
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
1313
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
1414
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
@@ -37,7 +37,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
3737
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
3838
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]]
3939
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
40-
; CHECK-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 2
40+
; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1
4141
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]]
4242
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8
4343
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
@@ -109,7 +109,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
109109
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
110110
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
111111
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
112-
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2
112+
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
113113
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]]
114114
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
115115
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
@@ -225,7 +225,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
225225
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
226226
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
227227
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
228-
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2
228+
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
229229
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]]
230230
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
231231
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define void @f1(ptr %A) #0 {
1010
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
1111
; CHECK-NEXT: entry:
1212
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
13+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
1414
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1515
; CHECK: vector.ph:
1616
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
7272
; SVE: for.body.preheader:
7373
; SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
7474
; SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
75-
; SVE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
75+
; SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
7676
; SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
7777
; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
7878
; SVE: vector.ph:

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
1313
; DEFAULT-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
1414
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
1515
; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
16-
; DEFAULT-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16
16+
; DEFAULT-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 4
1717
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
1818
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1919
; DEFAULT: [[VECTOR_MEMCHECK]]:
@@ -36,7 +36,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
3636
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
3737
; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
3838
; DEFAULT-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
39-
; DEFAULT-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 8
39+
; DEFAULT-NEXT: [[TMP24:%.*]] = shl nuw i64 [[TMP23]], 3
4040
; DEFAULT-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP20]], i64 [[TMP24]]
4141
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP20]], align 1
4242
; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP25]], align 1
@@ -54,7 +54,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
5454
; DEFAULT-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i16> [[TMP35]] to <vscale x 8 x i8>
5555
; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
5656
; DEFAULT-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64()
57-
; DEFAULT-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], 8
57+
; DEFAULT-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 3
5858
; DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP38]], i64 [[TMP42]]
5959
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP36]], ptr [[TMP38]], align 1
6060
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP37]], ptr [[TMP43]], align 1
@@ -104,7 +104,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
104104
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0
105105
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
106106
; PRED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
107-
; PRED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 16
107+
; PRED-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 4
108108
; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], [[TMP12]]
109109
; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], [[TMP12]]
110110
; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0

llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
1111
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ITER_CHECK:.*]]:
1313
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
14-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
14+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
1515
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]]
1616
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
1717
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
@@ -43,7 +43,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
4343
; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]]
4444
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
4545
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
46-
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 2
46+
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1
4747
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP15]]
4848
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
4949
; CHECK: [[VEC_EPILOG_PH]]:
@@ -147,7 +147,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
147147
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
148148
; CHECK-NEXT: [[ITER_CHECK:.*]]:
149149
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
150-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
150+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
151151
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]]
152152
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
153153
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
@@ -179,7 +179,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
179179
; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]]
180180
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
181181
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
182-
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 2
182+
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1
183183
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP15]]
184184
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
185185
; CHECK: [[VEC_EPILOG_PH]]:

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
137137
; INTERLEAVE-4-VLA-LABEL: @interleave_integer_reduction(
138138
; INTERLEAVE-4-VLA-NEXT: entry:
139139
; INTERLEAVE-4-VLA-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
140-
; INTERLEAVE-4-VLA-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
140+
; INTERLEAVE-4-VLA-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
141141
; INTERLEAVE-4-VLA-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
142142
; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
143143
; INTERLEAVE-4-VLA: vector.ph:
@@ -154,10 +154,10 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
154154
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
155155
; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
156156
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
157-
; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
157+
; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
158158
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP8]]
159159
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
160-
; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8
160+
; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 3
161161
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP11]]
162162
; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
163163
; INTERLEAVE-4-VLA-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 12

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
4949
; CHECK-VS1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
5050
; CHECK-VS1-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]]
5151
; CHECK-VS1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
52-
; CHECK-VS1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
52+
; CHECK-VS1-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 3
5353
; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]]
5454
; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
5555
; CHECK-VS1: [[VECTOR_SCEVCHECK]]:
@@ -64,7 +64,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
6464
; CHECK-VS1-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
6565
; CHECK-VS1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
6666
; CHECK-VS1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
67-
; CHECK-VS1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 16
67+
; CHECK-VS1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 4
6868
; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]]
6969
; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
7070
; CHECK-VS1: [[VECTOR_PH]]:
@@ -92,7 +92,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
9292
; CHECK-VS1-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]]
9393
; CHECK-VS1-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
9494
; CHECK-VS1-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
95-
; CHECK-VS1-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 8
95+
; CHECK-VS1-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 3
9696
; CHECK-VS1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP27]]
9797
; CHECK-VS1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
9898
; CHECK-VS1: [[VEC_EPILOG_PH]]:
@@ -149,7 +149,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
149149
; CHECK-VS2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
150150
; CHECK-VS2-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]]
151151
; CHECK-VS2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
152-
; CHECK-VS2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
152+
; CHECK-VS2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
153153
; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]]
154154
; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
155155
; CHECK-VS2: [[VECTOR_SCEVCHECK]]:
@@ -164,7 +164,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
164164
; CHECK-VS2-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
165165
; CHECK-VS2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
166166
; CHECK-VS2-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
167-
; CHECK-VS2-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8
167+
; CHECK-VS2-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3
168168
; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]]
169169
; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
170170
; CHECK-VS2: [[VECTOR_PH]]:
@@ -192,7 +192,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
192192
; CHECK-VS2-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]]
193193
; CHECK-VS2-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
194194
; CHECK-VS2-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
195-
; CHECK-VS2-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 4
195+
; CHECK-VS2-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 2
196196
; CHECK-VS2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP27]]
197197
; CHECK-VS2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
198198
; CHECK-VS2: [[VEC_EPILOG_PH]]:

0 commit comments

Comments
 (0)