Skip to content

Commit f3520c5

Browse files
authored
[VPlan] Replace EVL branch condition with (branch-on-count AVLNext, 0) (#152167)
This changes the branch condition to use the AVL's backedge value instead of the EVL-based IV. This allows us to emit bnez on RISC-V and removes a use of the trip count, which should reduce register pressure. To match phis with VPlanPatternMatch I've had to relax the assert that the number of operands must exactly match the pattern for the Phi opcode, and I've copied over m_ZExtOrSelf from the LLVM IR PatternMatch.h. Fixes #151459
1 parent 3d498e5 commit f3520c5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+451
-473
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,12 @@ struct Recipe_match {
218218
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
219219
return false;
220220

221-
assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
222-
"recipe with matched opcode does not have the expected number of "
223-
"operands");
221+
if (R->getNumOperands() != std::tuple_size<Ops_t>::value) {
222+
assert(Opcode == Instruction::PHI &&
223+
"non-variadic recipe with matched opcode does not have the "
224+
"expected number of operands");
225+
return false;
226+
}
224227

225228
auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>();
226229
if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) {
@@ -301,11 +304,18 @@ m_Broadcast(const Op0_t &Op0) {
301304
return m_VPInstruction<VPInstruction::Broadcast>(Op0);
302305
}
303306

307+
template <typename Op0_t>
308+
inline VPInstruction_match<VPInstruction::ExplicitVectorLength, Op0_t>
309+
m_EVL(const Op0_t &Op0) {
310+
return m_VPInstruction<VPInstruction::ExplicitVectorLength>(Op0);
311+
}
312+
304313
template <typename Op0_t>
305314
inline VPInstruction_match<VPInstruction::ExtractLastElement, Op0_t>
306315
m_ExtractLastElement(const Op0_t &Op0) {
307316
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
308317
}
318+
309319
template <typename Op0_t, typename Op1_t>
310320
inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t>
311321
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) {
@@ -345,6 +355,12 @@ m_ZExtOrSExt(const Op0_t &Op0) {
345355
return m_CombineOr(m_ZExt(Op0), m_SExt(Op0));
346356
}
347357

358+
template <typename Op0_t>
359+
inline match_combine_or<AllRecipe_match<Instruction::ZExt, Op0_t>, Op0_t>
360+
m_ZExtOrSelf(const Op0_t &Op0) {
361+
return m_CombineOr(m_ZExt(Op0), Op0);
362+
}
363+
348364
template <unsigned Opcode, typename Op0_t, typename Op1_t>
349365
inline AllRecipe_match<Opcode, Op0_t, Op1_t> m_Binary(const Op0_t &Op0,
350366
const Op1_t &Op1) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2473,6 +2473,22 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
24732473

24742474
VPBasicBlock *HeaderVPBB = EVLPhi->getParent();
24752475
VPValue *EVLIncrement = EVLPhi->getBackedgeValue();
2476+
VPValue *AVL;
2477+
[[maybe_unused]] bool FoundAVL =
2478+
match(EVLIncrement,
2479+
m_c_Add(m_ZExtOrSelf(m_EVL(m_VPValue(AVL))), m_Specific(EVLPhi)));
2480+
assert(FoundAVL && "Didn't find AVL?");
2481+
2482+
// The AVL may be capped to a safe distance.
2483+
VPValue *SafeAVL;
2484+
if (match(AVL, m_Select(m_VPValue(), m_VPValue(SafeAVL), m_VPValue())))
2485+
AVL = SafeAVL;
2486+
2487+
VPValue *AVLNext;
2488+
[[maybe_unused]] bool FoundAVLNext =
2489+
match(AVL, m_VPInstruction<Instruction::PHI>(
2490+
m_Specific(Plan.getTripCount()), m_VPValue(AVLNext)));
2491+
assert(FoundAVLNext && "Didn't find AVL backedge?");
24762492

24772493
// Convert EVLPhi to concrete recipe.
24782494
auto *ScalarR =
@@ -2496,7 +2512,7 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
24962512

24972513
// Replace the use of VectorTripCount in the latch-exiting block.
24982514
// Before: (branch-on-count EVLIVInc, VectorTripCount)
2499-
// After: (branch-on-count EVLIVInc, TripCount)
2515+
// After: (branch-on-cond eq AVLNext, 0)
25002516

25012517
VPBasicBlock *LatchExiting =
25022518
HeaderVPBB->getPredecessors()[1]->getEntryBasicBlock();
@@ -2509,7 +2525,14 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
25092525
m_BranchOnCount(m_VPValue(EVLIncrement),
25102526
m_Specific(&Plan.getVectorTripCount()))) &&
25112527
"Unexpected terminator in EVL loop");
2512-
LatchExitingBr->setOperand(1, Plan.getTripCount());
2528+
2529+
Type *AVLTy = VPTypeAnalysis(Plan).inferScalarType(AVLNext);
2530+
VPBuilder Builder(LatchExitingBr);
2531+
VPValue *Cmp =
2532+
Builder.createICmp(CmpInst::ICMP_EQ, AVLNext,
2533+
Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy)));
2534+
Builder.createNaryOp(VPInstruction::BranchOnCond, Cmp);
2535+
LatchExitingBr->eraseFromParent();
25132536
}
25142537

25152538
void VPlanTransforms::dropPoisonGeneratingRecipes(

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,10 @@ struct VPlanTransforms {
219219
/// variable vector lengths instead of fixed lengths. This transformation:
220220
/// * Makes EVL-Phi concrete.
221221
// * Removes CanonicalIV and increment.
222-
/// * Replaces fixed-length stepping (branch-on-cond CanonicalIVInc,
223-
/// VectorTripCount) with variable-length stepping (branch-on-cond
224-
/// EVLIVInc, TripCount).
222+
/// * Replaces the exit condition from
223+
/// (branch-on-count CanonicalIVInc, VectorTripCount)
224+
/// to
225+
/// (branch-on-cond eq AVLNext, 0)
225226
static void canonicalizeEVLLoops(VPlan &Plan);
226227

227228
/// Lower abstract recipes to concrete ones, that can be codegen'd.

llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
4242
; ZVFBFMIN-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
4343
; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[TMP0]]
4444
; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]]
45-
; ZVFBFMIN-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
46-
; ZVFBFMIN-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
45+
; ZVFBFMIN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
46+
; ZVFBFMIN-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4747
; ZVFBFMIN: [[MIDDLE_BLOCK]]:
4848
; ZVFBFMIN-NEXT: br label %[[EXIT:.*]]
4949
; ZVFBFMIN: [[SCALAR_PH]]:
@@ -151,8 +151,8 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
151151
; ZVFBFMIN-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
152152
; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[TMP6]]
153153
; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]]
154-
; ZVFBFMIN-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
155-
; ZVFBFMIN-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
154+
; ZVFBFMIN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
155+
; ZVFBFMIN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
156156
; ZVFBFMIN: [[MIDDLE_BLOCK]]:
157157
; ZVFBFMIN-NEXT: br label %[[EXIT:.*]]
158158
; ZVFBFMIN: [[SCALAR_PH]]:

llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
1818
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
1919
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2020
; CHECK: [[VECTOR_BODY]]:
21-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
2221
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
2322
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
2423
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -29,10 +28,9 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
2928
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
3029
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP20]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP12]])
3130
; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
32-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
3331
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
3432
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
35-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]]
33+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
3634
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3735
; CHECK: [[MIDDLE_BLOCK]]:
3836
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -94,7 +92,6 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
9492
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
9593
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
9694
; CHECK: [[VECTOR_BODY]]:
97-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
9895
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
9996
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
10097
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -105,10 +102,9 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
105102
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
106103
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP10]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP9]])
107104
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
108-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
109105
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
110106
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
111-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333
107+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
112108
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
113109
; CHECK: [[MIDDLE_BLOCK]]:
114110
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -170,7 +166,6 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
170166
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
171167
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
172168
; CHECK: [[VECTOR_BODY]]:
173-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
174169
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
175170
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
176171
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -181,10 +176,9 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
181176
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
182177
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP10]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP9]])
183178
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
184-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
185179
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
186180
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
187-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333
181+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
188182
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
189183
; CHECK: [[MIDDLE_BLOCK]]:
190184
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -256,7 +250,6 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
256250
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
257251
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
258252
; CHECK: [[VECTOR_BODY]]:
259-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
260253
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
261254
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
262255
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -267,10 +260,9 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
267260
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
268261
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP20]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP12]])
269262
; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
270-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
271263
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
272264
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
273-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]]
265+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
274266
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
275267
; CHECK: [[MIDDLE_BLOCK]]:
276268
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -344,7 +336,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
344336
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
345337
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
346338
; CHECK: [[VECTOR_BODY]]:
347-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
348339
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
349340
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
350341
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -355,10 +346,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
355346
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
356347
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP10]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP9]])
357348
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
358-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
359349
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
360350
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
361-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333
351+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
362352
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
363353
; CHECK: [[MIDDLE_BLOCK]]:
364354
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -443,7 +433,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
443433
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
444434
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
445435
; CHECK: [[VECTOR_BODY]]:
446-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
447436
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
448437
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
449438
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -467,10 +456,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
467456
; CHECK-NEXT: [[TMP24:%.*]] = or <vscale x 8 x i1> [[TMP22]], [[TMP23]]
468457
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP20]], <vscale x 8 x i1> [[TMP24]], i32 [[TMP27]])
469458
; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64
470-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[EVL_BASED_IV]]
471459
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]]
472460
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
473-
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]]
461+
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
474462
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
475463
; CHECK: [[MIDDLE_BLOCK]]:
476464
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -562,7 +550,7 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 {
562550
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP13]] to i64
563551
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[TMP9]]
564552
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
565-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
553+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
566554
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
567555
; CHECK: [[MIDDLE_BLOCK]]:
568556
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -637,7 +625,7 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 {
637625
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP13]] to i64
638626
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[TMP9]]
639627
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
640-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
628+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
641629
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
642630
; CHECK: [[MIDDLE_BLOCK]]:
643631
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -727,7 +715,6 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 {
727715
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP25]]
728716
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
729717
; CHECK: [[VECTOR_BODY]]:
730-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
731718
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
732719
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP3]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
733720
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -738,11 +725,10 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 {
738725
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
739726
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> align 4 [[TMP21]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP18]]), !alias.scope [[META19:![0-9]+]], !noalias [[META22:![0-9]+]]
740727
; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP18]] to i64
741-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
742728
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
743729
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
744-
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP3]]
745-
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
730+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
731+
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
746732
; CHECK: [[MIDDLE_BLOCK]]:
747733
; CHECK-NEXT: br label %[[EXIT:.*]]
748734
; CHECK: [[SCALAR_PH]]:

0 commit comments

Comments
 (0)