Skip to content

Commit 158f82a

Browse files
committed
[VPlan] Replace EVL branch condition with (branch-on-count AVLNext, 0)
This changes the branch condition to use the AVL's backedge value instead of the EVL-based IV. This allows us to emit bnez on RISC-V and removes a use of the trip count, which should reduce register pressure. To help match the AVL's backedge value I've added some new pattern matchers. For the m_Phi matcher it's variadic in the number of operands it accepts, so I had to add a new template argument to Recipe_match to relax the assertion that the number of operands must exactly match the template operand types. For m_Sub I've used it in a couple of other places that were also pattern matching on subs. Happy to split this out if reviewers prefer. Fixes #151459
1 parent 2e0ddbb commit 158f82a

32 files changed

+285
-223
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ inline bind_ty<VPValue> m_VPValue(VPValue *&V) { return V; }
195195
/// Match a VPInstruction, capturing if we match.
196196
inline bind_ty<VPInstruction> m_VPInstruction(VPInstruction *&V) { return V; }
197197

198-
template <typename Ops_t, unsigned Opcode, bool Commutative,
198+
template <typename Ops_t, unsigned Opcode, bool Commutative, bool Variadic,
199199
typename... RecipeTys>
200200
struct Recipe_match {
201201
Ops_t Ops;
@@ -231,9 +231,12 @@ struct Recipe_match {
231231
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
232232
return false;
233233

234-
assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
235-
"recipe with matched opcode does not have the expected number of "
236-
"operands");
234+
if (R->getNumOperands() != std::tuple_size<Ops_t>::value) {
235+
assert(Variadic && "non-variadic recipe with matched opcode does not "
236+
"have the expected number of "
237+
"operands");
238+
return false;
239+
}
237240

238241
auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>();
239242
if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) {
@@ -256,7 +259,9 @@ struct Recipe_match {
256259
std::is_same<RecipeTy, VPCanonicalIVPHIRecipe>::value ||
257260
std::is_same<RecipeTy, VPWidenSelectRecipe>::value ||
258261
std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
259-
std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
262+
std::is_same<RecipeTy, VPWidenGEPRecipe>::value ||
263+
std::is_same<RecipeTy, VPWidenPHIRecipe>::value ||
264+
std::is_same<RecipeTy, VPHeaderPHIRecipe>::value)
260265
return DefR;
261266
else
262267
return DefR && DefR->getOpcode() == Opcode;
@@ -272,11 +277,11 @@ struct Recipe_match {
272277

273278
template <unsigned Opcode, typename... RecipeTys>
274279
using ZeroOpRecipe_match =
275-
Recipe_match<std::tuple<>, Opcode, false, RecipeTys...>;
280+
Recipe_match<std::tuple<>, Opcode, false, false, RecipeTys...>;
276281

277282
template <typename Op0_t, unsigned Opcode, typename... RecipeTys>
278283
using UnaryRecipe_match =
279-
Recipe_match<std::tuple<Op0_t>, Opcode, false, RecipeTys...>;
284+
Recipe_match<std::tuple<Op0_t>, Opcode, false, false, RecipeTys...>;
280285

281286
template <typename Op0_t, unsigned Opcode>
282287
using UnaryVPInstruction_match =
@@ -293,7 +298,8 @@ using AllUnaryRecipe_match =
293298
template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutative,
294299
typename... RecipeTys>
295300
using BinaryRecipe_match =
296-
Recipe_match<std::tuple<Op0_t, Op1_t>, Opcode, Commutative, RecipeTys...>;
301+
Recipe_match<std::tuple<Op0_t, Op1_t>, Opcode, Commutative,
302+
/*Variadic*/ false, RecipeTys...>;
297303

298304
template <typename Op0_t, typename Op1_t, unsigned Opcode>
299305
using BinaryVPInstruction_match =
@@ -302,8 +308,9 @@ using BinaryVPInstruction_match =
302308

303309
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode,
304310
bool Commutative, typename... RecipeTys>
305-
using TernaryRecipe_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>,
306-
Opcode, Commutative, RecipeTys...>;
311+
using TernaryRecipe_match =
312+
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, Commutative,
313+
/*Variadic*/ false, RecipeTys...>;
307314

308315
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
309316
using TernaryVPInstruction_match =
@@ -343,8 +350,9 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
343350

344351
template <typename Op0_t, typename Op1_t, typename Op2_t, typename Op3_t,
345352
unsigned Opcode, bool Commutative, typename... RecipeTys>
346-
using Recipe4Op_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t, Op3_t>,
347-
Opcode, Commutative, RecipeTys...>;
353+
using Recipe4Op_match =
354+
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t, Op3_t>, Opcode, Commutative,
355+
/*Variadic*/ false, RecipeTys...>;
348356

349357
template <typename Op0_t, typename Op1_t, typename Op2_t, typename Op3_t,
350358
unsigned Opcode>
@@ -378,6 +386,12 @@ m_Broadcast(const Op0_t &Op0) {
378386
return m_VPInstruction<VPInstruction::Broadcast>(Op0);
379387
}
380388

389+
template <typename Op0_t>
390+
inline UnaryVPInstruction_match<Op0_t, VPInstruction::ExplicitVectorLength>
391+
m_ExplicitVectorLength(const Op0_t &Op0) {
392+
return m_VPInstruction<VPInstruction::ExplicitVectorLength>(Op0);
393+
}
394+
381395
template <typename Op0_t, typename Op1_t>
382396
inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::ActiveLaneMask>
383397
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) {
@@ -418,6 +432,12 @@ m_ZExtOrSExt(const Op0_t &Op0) {
418432
return m_CombineOr(m_ZExt(Op0), m_SExt(Op0));
419433
}
420434

435+
template <typename Op0_t>
436+
inline match_combine_or<AllUnaryRecipe_match<Op0_t, Instruction::ZExt>, Op0_t>
437+
m_ZExtOrSelf(const Op0_t &Op0) {
438+
return m_CombineOr(m_ZExt(Op0), Op0);
439+
}
440+
421441
template <unsigned Opcode, typename Op0_t, typename Op1_t,
422442
bool Commutative = false>
423443
inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>
@@ -431,6 +451,12 @@ m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) {
431451
return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>(Op0, Op1);
432452
}
433453

454+
template <typename Op0_t, typename Op1_t>
455+
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Sub>
456+
m_Sub(const Op0_t &Op0, const Op1_t &Op1) {
457+
return m_Binary<Instruction::Sub, Op0_t, Op1_t>(Op0, Op1);
458+
}
459+
434460
template <typename Op0_t, typename Op1_t>
435461
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Mul>
436462
m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
@@ -476,7 +502,8 @@ inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
476502
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
477503
using AllTernaryRecipe_match =
478504
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, false,
479-
VPReplicateRecipe, VPInstruction, VPWidenSelectRecipe>;
505+
/*Variadic*/ false, VPReplicateRecipe, VPInstruction,
506+
VPWidenSelectRecipe>;
480507

481508
template <typename Op0_t, typename Op1_t, typename Op2_t>
482509
inline AllTernaryRecipe_match<Op0_t, Op1_t, Op2_t, Instruction::Select>
@@ -524,14 +551,26 @@ m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
524551

525552
template <typename Op0_t, typename Op1_t, typename Op2_t>
526553
using VPDerivedIV_match =
527-
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, 0, false, VPDerivedIVRecipe>;
554+
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, 0, false, /*Variadic*/ false,
555+
VPDerivedIVRecipe>;
528556

529557
template <typename Op0_t, typename Op1_t, typename Op2_t>
530558
inline VPDerivedIV_match<Op0_t, Op1_t, Op2_t>
531559
m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
532560
return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
533561
}
534562

563+
template <typename... OpTys>
564+
using PhiLikeRecipe_match =
565+
Recipe_match<std::tuple<OpTys...>, Instruction::PHI, false, true,
566+
VPWidenPHIRecipe, VPHeaderPHIRecipe, VPInstruction>;
567+
568+
template <typename Op0_t, typename Op1_t, typename... OpTys>
569+
inline PhiLikeRecipe_match<Op0_t, Op1_t, OpTys...>
570+
m_Phi(const Op0_t &Op0, const Op1_t &Op1, const OpTys &...Ops) {
571+
return PhiLikeRecipe_match<Op0_t, Op1_t, OpTys...>(Op0, Op1, Ops...);
572+
}
573+
535574
/// Match a call argument at a given argument index.
536575
template <typename Opnd_t> struct Argument_match {
537576
/// Call argument index to match.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
328328
// Pick out opcode, type/ext information and use sub side effects from a widen
329329
// recipe.
330330
auto HandleWiden = [&](VPWidenRecipe *Widen) {
331-
if (match(Widen,
332-
m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Op)))) {
331+
if (match(Widen, m_Sub(m_SpecificInt(0), m_VPValue(Op)))) {
333332
Widen = dyn_cast<VPWidenRecipe>(Op->getDefiningRecipe());
334333
}
335334
Opcode = Widen->getOpcode();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -740,8 +740,7 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {
740740
// IVStep will be the negated step of the subtraction. Check if Step == -1
741741
// * IVStep.
742742
VPValue *Step;
743-
if (!match(VPV,
744-
m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
743+
if (!match(VPV, m_Sub(m_VPValue(), m_VPValue(Step))) ||
745744
!Step->isLiveIn() || !IVStep->isLiveIn())
746745
return false;
747746
auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
@@ -2386,19 +2385,38 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
23862385
// Find EVL loop entries by locating VPEVLBasedIVPHIRecipe.
23872386
// There should be only one EVL PHI in the entire plan.
23882387
VPEVLBasedIVPHIRecipe *EVLPhi = nullptr;
2388+
VPValue *AVLNext = nullptr;
23892389

23902390
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
23912391
vp_depth_first_shallow(Plan.getEntry())))
2392-
for (VPRecipeBase &R : VPBB->phis())
2393-
if (auto *PhiR = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
2392+
for (VPRecipeBase &R : VPBB->phis()) {
2393+
auto *PhiR = dyn_cast<VPSingleDefRecipe>(&R);
2394+
if (!PhiR)
2395+
continue;
2396+
VPValue *Backedge;
2397+
if (auto *EVL = dyn_cast<VPEVLBasedIVPHIRecipe>(PhiR)) {
23942398
assert(!EVLPhi && "Found multiple EVL PHIs. Only one expected");
2395-
EVLPhi = PhiR;
2399+
EVLPhi = EVL;
2400+
continue;
23962401
}
2402+
if (match(PhiR,
2403+
m_Phi(m_Specific(Plan.getTripCount()), m_VPValue(Backedge))) &&
2404+
match(Backedge, m_Sub(m_Specific(PhiR),
2405+
m_ZExtOrSelf(m_ExplicitVectorLength(m_CombineOr(
2406+
m_Specific(PhiR),
2407+
// The AVL may be capped to a safe distance.
2408+
m_Select(m_VPValue(), m_Specific(PhiR),
2409+
m_VPValue()))))))) {
2410+
AVLNext = Backedge;
2411+
}
2412+
}
23972413

23982414
// Early return if no EVL PHI is found.
23992415
if (!EVLPhi)
24002416
return;
24012417

2418+
assert(AVLNext && "Didn't find AVL backedge?");
2419+
24022420
VPBasicBlock *HeaderVPBB = EVLPhi->getParent();
24032421
VPValue *EVLIncrement = EVLPhi->getBackedgeValue();
24042422

@@ -2425,7 +2443,7 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
24252443

24262444
// Replace the use of VectorTripCount in the latch-exiting block.
24272445
// Before: (branch-on-count EVLIVInc, VectorTripCount)
2428-
// After: (branch-on-count EVLIVInc, TripCount)
2446+
// After: (branch-on-count AVLNext, 0)
24292447

24302448
VPBasicBlock *LatchExiting =
24312449
HeaderVPBB->getPredecessors()[1]->getEntryBasicBlock();
@@ -2438,7 +2456,12 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
24382456
m_BranchOnCount(m_VPValue(EVLIncrement),
24392457
m_Specific(&Plan.getVectorTripCount()))) &&
24402458
"Unexpected terminator in EVL loop");
2441-
LatchExitingBr->setOperand(1, Plan.getTripCount());
2459+
2460+
Type *AVLTy = VPTypeAnalysis(Plan).inferScalarType(AVLNext);
2461+
2462+
LatchExitingBr->setOperand(0, AVLNext);
2463+
LatchExitingBr->setOperand(
2464+
1, Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy)));
24422465
}
24432466

24442467
void VPlanTransforms::dropPoisonGeneratingRecipes(

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,10 @@ struct VPlanTransforms {
216216
/// variable vector lengths instead of fixed lengths. This transformation:
217217
/// * Makes EVL-Phi concrete.
218218
// * Removes CanonicalIV and increment.
219-
/// * Replaces fixed-length stepping (branch-on-cond CanonicalIVInc,
220-
/// VectorTripCount) with variable-length stepping (branch-on-cond
221-
/// EVLIVInc, TripCount).
219+
/// * Replaces the exit condition from
220+
/// (branch-on-cond CanonicalIVInc, VectorTripCount)
221+
/// to
222+
/// (branch-on-cond AVLNext, 0)
222223
static void canonicalizeEVLLoops(VPlan &Plan);
223224

224225
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p

llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
3737
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
3838
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
3939
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
40-
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
41-
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
40+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
41+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4242
; CHECK: middle.block:
4343
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
4444
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
141141
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
142142
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
143143
; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP5]]
144-
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
144+
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
145145
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
146146
; IF-EVL-OUTLOOP: middle.block:
147147
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]])
@@ -195,7 +195,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
195195
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
196196
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
197197
; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[TMP5]], [[TMP6]]
198-
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
198+
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
199199
; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
200200
; IF-EVL-INLOOP: middle.block:
201201
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
@@ -362,8 +362,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
362362
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
363363
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
364364
; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
365-
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
366-
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
365+
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
366+
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
367367
; IF-EVL-OUTLOOP: middle.block:
368368
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]])
369369
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]]
@@ -410,7 +410,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
410410
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
411411
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
412412
; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
413-
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
413+
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
414414
; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
415415
; IF-EVL-INLOOP: middle.block:
416416
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]

llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
143143
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
144144
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
145145
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
146-
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
146+
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
147147
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
148148
; PREDICATED_DATA-WITH-EVL: middle.block:
149149
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]]
@@ -334,7 +334,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
334334
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
335335
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
336336
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
337-
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
337+
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
338338
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
339339
; PREDICATED_DATA-WITH-EVL: middle.block:
340340
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]]

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ define void @test(ptr %p, i64 %a, i8 %b) {
5050
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
5151
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP11]]
5252
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT8]]
53-
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9
54-
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
53+
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
54+
; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
5555
; CHECK: middle.block:
5656
; CHECK-NEXT: br label [[EXIT1:%.*]]
5757
; CHECK: scalar.ph:

0 commit comments

Comments
 (0)