Skip to content

Commit a034d69

Browse files
hassnaaHamdigithub-actions[bot]
authored andcommitted
Automerge: [LV]: Skip Epilogue scalable VF greater than RemainingIterations. (#156724)
Consider skipping epilogue scalable VF when they are greater than RemainingIterations same as fixed VF. And skip scalable RemainingIterations from that comparison because SCEV ATM can't evaluate non-canonical vscale-based expressions.
2 parents 114cfd2 + f7f4135 commit a034d69

File tree

3 files changed

+63
-153
lines changed

3 files changed

+63
-153
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4428,11 +4428,13 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44284428
assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
44294429
const SCEV *KnownMinTC;
44304430
bool ScalableTC = match(TC, m_scev_c_Mul(m_SCEV(KnownMinTC), m_SCEVVScale()));
4431+
bool ScalableRemIter = false;
44314432
// Use versions of TC and VF in which both are either scalable or fixed.
4432-
if (ScalableTC == MainLoopVF.isScalable())
4433+
if (ScalableTC == MainLoopVF.isScalable()) {
4434+
ScalableRemIter = ScalableTC;
44334435
RemainingIterations =
44344436
SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
4435-
else if (ScalableTC) {
4437+
} else if (ScalableTC) {
44364438
const SCEV *EstimatedTC = SE.getMulExpr(
44374439
KnownMinTC,
44384440
SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1)));
@@ -4456,6 +4458,9 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44564458
<< MaxTripCount << "\n");
44574459
}
44584460

4461+
auto SkipVF = [&](const SCEV *VF, const SCEV *RemIter) -> bool {
4462+
return SE.isKnownPredicate(CmpInst::ICMP_UGT, VF, RemIter);
4463+
};
44594464
for (auto &NextVF : ProfitableVFs) {
44604465
// Skip candidate VFs without a corresponding VPlan.
44614466
if (!hasPlanWithVF(NextVF.Width))
@@ -4473,11 +4478,17 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44734478

44744479
// If NextVF is greater than the number of remaining iterations, the
44754480
// epilogue loop would be dead. Skip such factors.
4476-
if (RemainingIterations && !NextVF.Width.isScalable()) {
4477-
if (SE.isKnownPredicate(
4478-
CmpInst::ICMP_UGT,
4479-
SE.getConstant(TCType, NextVF.Width.getFixedValue()),
4480-
RemainingIterations))
4481+
// TODO: We should also consider comparing against a scalable
4482+
// RemainingIterations when SCEV be able to evaluate non-canonical
4483+
// vscale-based expressions.
4484+
if (!ScalableRemIter) {
4485+
// Handle the case where NextVF and RemainingIterations are in different
4486+
// numerical spaces.
4487+
ElementCount EC = NextVF.Width;
4488+
if (NextVF.Width.isScalable())
4489+
EC = ElementCount::getFixed(
4490+
estimateElementCount(NextVF.Width, CM.getVScaleForTuning()));
4491+
if (SkipVF(SE.getElementCount(TCType, EC), RemainingIterations))
44814492
continue;
44824493
}
44834494

llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll

Lines changed: 17 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,8 @@ target triple = "aarch64-linux-gnu"
99
define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 {
1010
; CHECK-LABEL: define i64 @vector_loop_with_remaining_iterations(
1111
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
12-
; CHECK-NEXT: [[ITER_CHECK:.*]]:
13-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
14-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
15-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]]
16-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
17-
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
18-
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1914
; CHECK: [[VECTOR_PH]]:
2015
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0
2116
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
@@ -39,58 +34,12 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
3934
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4035
; CHECK: [[MIDDLE_BLOCK]]:
4136
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]])
42-
; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]]
43-
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
44-
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP1]]
45-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
46-
; CHECK: [[VEC_EPILOG_PH]]:
47-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
48-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
49-
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
50-
; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 2
51-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 17, [[TMP17]]
52-
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
53-
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]]
54-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]]
55-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0
56-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
57-
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
58-
; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
59-
; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
60-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
61-
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
62-
; CHECK-NEXT: [[TMP26:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1)
63-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP26]]
64-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0
65-
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
66-
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
67-
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
68-
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
69-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
70-
; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <vscale x 2 x i64> [ [[TMP22]], %[[VEC_EPILOG_PH]] ], [ [[TMP35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
71-
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3
72-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 [[TMP28]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison)
73-
; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32>
74-
; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]])
75-
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
76-
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
77-
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
78-
; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64>
79-
; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]]
80-
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]]
81-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]]
82-
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]]
83-
; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
84-
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
85-
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[TMP35]])
86-
; CHECK-NEXT: br label %[[VEC_EPILOG_SCALAR_PH]]
87-
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
88-
; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
89-
; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP37]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
37+
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
38+
; CHECK: [[SCALAR_PH]]:
9039
; CHECK-NEXT: br label %[[LOOP:.*]]
9140
; CHECK: [[LOOP]]:
92-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
93-
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
41+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
42+
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
9443
; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3
9544
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1
9645
; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32
@@ -104,7 +53,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
10453
; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]]
10554
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
10655
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17
107-
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
56+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
10857
; CHECK: [[EXIT]]:
10958
; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ]
11059
; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]]
@@ -140,13 +89,8 @@ exit:
14089
define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 {
14190
; CHECK-LABEL: define i64 @main_vector_loop_fixed_with_no_remaining_iterations(
14291
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
143-
; CHECK-NEXT: [[ITER_CHECK:.*]]:
144-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
145-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
146-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]]
147-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
148-
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
149-
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
92+
; CHECK-NEXT: [[ENTRY:.*:]]
93+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
15094
; CHECK: [[VECTOR_PH]]:
15195
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0
15296
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
@@ -167,61 +111,15 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
167111
; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]]
168112
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
169113
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
170-
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
114+
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
171115
; CHECK: [[MIDDLE_BLOCK]]:
172116
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]])
173-
; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]]
174-
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
175-
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP1]]
176-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
177-
; CHECK: [[VEC_EPILOG_PH]]:
178-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
179-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
180-
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
181-
; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 2
182-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 17, [[TMP17]]
183-
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
184-
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]]
185-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]]
186-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0
187-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
188-
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
189-
; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
190-
; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
191-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
192-
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
193-
; CHECK-NEXT: [[TMP38:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1)
194-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP38]]
195-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0
196-
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
197-
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
198-
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
199-
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
200-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
201-
; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <vscale x 2 x i64> [ [[TMP22]], %[[VEC_EPILOG_PH]] ], [ [[TMP35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
202-
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3
203-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 [[TMP28]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison)
204-
; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32>
205-
; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]])
206-
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
207-
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
208-
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
209-
; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64>
210-
; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]]
211-
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]]
212-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]]
213-
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]]
214-
; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
215-
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
216-
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[TMP35]])
217-
; CHECK-NEXT: br label %[[VEC_EPILOG_SCALAR_PH]]
218-
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
219-
; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
220-
; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP37]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
117+
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
118+
; CHECK: [[SCALAR_PH]]:
221119
; CHECK-NEXT: br label %[[LOOP:.*]]
222120
; CHECK: [[LOOP]]:
223-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
224-
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
121+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
122+
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
225123
; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3
226124
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1
227125
; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32
@@ -235,7 +133,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
235133
; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]]
236134
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
237135
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17
238-
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
136+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
239137
; CHECK: [[EXIT]]:
240138
; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ]
241139
; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]]
@@ -308,7 +206,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
308206
; CHECK-NEXT: store i64 0, ptr [[L]], align 8
309207
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
310208
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
311-
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
209+
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
312210
; CHECK: [[MIDDLE_BLOCK]]:
313211
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
314212
; CHECK: [[SCALAR_PH]]:
@@ -332,7 +230,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
332230
; CHECK-NEXT: store i64 0, ptr [[L]], align 8
333231
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 2
334232
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV1]], 14
335-
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP11:![0-9]+]]
233+
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]]
336234
; CHECK: [[EXIT]]:
337235
; CHECK-NEXT: ret void
338236
;

0 commit comments

Comments
 (0)