Skip to content

Commit c487358

Browse files
committed
[VPlan] Move addExplicitVectorLength to tryToBuildVPlanWithVPRecipes
Stacked on llvm#166158 Currently we convert a VPlan to an EVL tail folded one after the VPlan is built and optimized, which doesn't match how we handle regular tail folding. This addresses a long standing TODO by performing it much earlier in the pipeline before any optimizations are run, and simulatneously splits out optimizeMaskToEVL into a separate pass to be run during VPlanTransforms::optimize. This way the two parts of EVL tail folding are separated into those needed for correctness and those that are an optimization. - We don't need to remove the old recipes ourselves anymore and can leave it to removeDeadRecipes - createScalarIVSteps needs to be updated to use the EVL based IV if it exists, so a helper method was added to VPlan to extract it - VPlanVerifier was updated to check that the EVL based IV always immediately follows the canonical IV Because we now optimize the VPlan after the EVL stuff is added, some simplifications e.g. replacing a scalar-steps when UF=1 kick in for the initial VPlan. Fixes llvm#153144
1 parent 06b3529 commit c487358

18 files changed

+120
-119
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8232,10 +8232,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
82328232
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
82338233
*Plan, CM.getMinimalBitwidths());
82348234
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
8235-
// TODO: try to put it close to addActiveLaneMask().
8236-
if (CM.foldTailWithEVL())
8237-
VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
8238-
*Plan, CM.getMaxSafeElements());
82398235
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
82408236
VPlans.push_back(std::move(Plan));
82418237
}
@@ -8499,6 +8495,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84998495
}
85008496
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, *PSE.getSE());
85018497

8498+
if (CM.foldTailWithEVL())
8499+
VPlanTransforms::addExplicitVectorLength(*Plan, CM.getMaxSafeElements());
8500+
85028501
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
85038502
return Plan;
85048503
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4114,6 +4114,11 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
41144114
return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
41154115
}
41164116

4117+
VPEVLBasedIVPHIRecipe *getEVLBasedIV() {
4118+
return dyn_cast<VPEVLBasedIVPHIRecipe>(
4119+
std::next(getCanonicalIV()->getIterator()));
4120+
}
4121+
41174122
/// Return the type of the canonical IV for loop regions.
41184123
Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
41194124
const Type *getCanonicalIVType() const {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 41 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -605,9 +605,11 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
605605
VPBuilder &Builder) {
606606
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
607607
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
608-
VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
609-
VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
610-
Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
608+
VPValue *IV = LoopRegion->getCanonicalIV();
609+
if (auto *EVLIV = LoopRegion->getEVLBasedIV())
610+
IV = EVLIV;
611+
VPSingleDefRecipe *BaseIV =
612+
Builder.createDerivedIV(Kind, FPBinOp, StartV, IV, Step, "offset.idx");
611613

612614
// Truncate base induction if needed.
613615
VPTypeAnalysis TypeInfo(Plan);
@@ -2331,6 +2333,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
23312333
runPass(removeRedundantExpandSCEVRecipes, Plan);
23322334
runPass(simplifyRecipes, Plan);
23332335
runPass(removeBranchOnConst, Plan);
2336+
runPass(optimizeMasksToEVL, Plan);
23342337
runPass(removeDeadRecipes, Plan);
23352338

23362339
runPass(createAndOptimizeReplicateRegions, Plan);
@@ -2621,8 +2624,40 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
26212624
return nullptr;
26222625
}
26232626

2624-
/// Replace recipes with their EVL variants.
2625-
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
2627+
void VPlanTransforms::optimizeMasksToEVL(VPlan &Plan) {
2628+
// Find the EVL-based header mask if it exists: icmp ult step-vector, EVL
2629+
VPInstruction *HeaderMask = nullptr;
2630+
for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) {
2631+
if (match(&R, m_ICmp(m_VPInstruction<VPInstruction::StepVector>(),
2632+
m_EVL(m_VPValue())))) {
2633+
HeaderMask = cast<VPInstruction>(&R);
2634+
break;
2635+
}
2636+
}
2637+
if (!HeaderMask)
2638+
return;
2639+
2640+
VPValue *EVL = HeaderMask->getOperand(1);
2641+
2642+
VPTypeAnalysis TypeInfo(Plan);
2643+
2644+
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
2645+
VPRecipeBase *R = cast<VPRecipeBase>(U);
2646+
if (auto *NewR = optimizeMaskToEVL(HeaderMask, *R, TypeInfo, *EVL)) {
2647+
NewR->insertBefore(R);
2648+
for (auto [Old, New] :
2649+
zip_equal(R->definedValues(), NewR->definedValues()))
2650+
Old->replaceAllUsesWith(New);
2651+
// Erase dead stores, the rest will be removed by removeDeadRecipes.
2652+
if (R->getNumDefinedValues() == 0)
2653+
R->eraseFromParent();
2654+
}
2655+
}
2656+
}
2657+
2658+
/// After replacing the IV with a EVL-based IV, fixup recipes that use VF to use
2659+
/// the EVL instead to avoid incorrect updates on the penultimate iteration.
2660+
static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) {
26262661
VPTypeAnalysis TypeInfo(Plan);
26272662
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
26282663
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
@@ -2650,10 +2685,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
26502685
return isa<VPWidenPointerInductionRecipe>(U);
26512686
});
26522687

2653-
// Defer erasing recipes till the end so that we don't invalidate the
2654-
// VPTypeAnalysis cache.
2655-
SmallVector<VPRecipeBase *> ToErase;
2656-
26572688
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
26582689
// contained.
26592690
bool ContainsFORs =
@@ -2687,7 +2718,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
26872718
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
26882719
VPSplice->insertBefore(&R);
26892720
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2690-
ToErase.push_back(&R);
26912721
}
26922722
}
26932723
}
@@ -2708,43 +2738,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
27082738
CmpInst::ICMP_ULT,
27092739
Builder.createNaryOp(VPInstruction::StepVector, {}, EVLType), &EVL);
27102740
HeaderMask->replaceAllUsesWith(EVLMask);
2711-
ToErase.push_back(HeaderMask->getDefiningRecipe());
2712-
2713-
// Try to optimize header mask recipes away to their EVL variants.
2714-
// TODO: Split optimizeMaskToEVL out and move into
2715-
// VPlanTransforms::optimize. transformRecipestoEVLRecipes should be run in
2716-
// tryToBuildVPlanWithVPRecipes beforehand.
2717-
for (VPUser *U : collectUsersRecursively(EVLMask)) {
2718-
auto *CurRecipe = cast<VPRecipeBase>(U);
2719-
VPRecipeBase *EVLRecipe =
2720-
optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);
2721-
if (!EVLRecipe)
2722-
continue;
2723-
2724-
unsigned NumDefVal = EVLRecipe->getNumDefinedValues();
2725-
assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2726-
"New recipe must define the same number of values as the "
2727-
"original.");
2728-
EVLRecipe->insertBefore(CurRecipe);
2729-
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe, VPInterleaveEVLRecipe>(
2730-
EVLRecipe)) {
2731-
for (unsigned I = 0; I < NumDefVal; ++I) {
2732-
VPValue *CurVPV = CurRecipe->getVPValue(I);
2733-
CurVPV->replaceAllUsesWith(EVLRecipe->getVPValue(I));
2734-
}
2735-
}
2736-
ToErase.push_back(CurRecipe);
2737-
}
2738-
// Remove dead EVL mask.
2739-
if (EVLMask->getNumUsers() == 0)
2740-
ToErase.push_back(EVLMask->getDefiningRecipe());
2741-
2742-
for (VPRecipeBase *R : reverse(ToErase)) {
2743-
SmallVector<VPValue *> PossiblyDead(R->operands());
2744-
R->eraseFromParent();
2745-
for (VPValue *Op : PossiblyDead)
2746-
recursivelyDeleteDeadRecipes(Op);
2747-
}
27482741
}
27492742

27502743
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
@@ -2842,7 +2835,7 @@ void VPlanTransforms::addExplicitVectorLength(
28422835
DebugLoc::getCompilerGenerated(), "avl.next");
28432836
AVLPhi->addOperand(NextAVL);
28442837

2845-
transformRecipestoEVLRecipes(Plan, *VPEVL);
2838+
fixupVFUsersForEVL(Plan, *VPEVL);
28462839

28472840
// Replace all uses of VPCanonicalIVPHIRecipe by
28482841
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,17 @@ struct VPlanTransforms {
377377
/// users in the original exit block using the VPIRInstruction wrapping to the
378378
/// LCSSA phi.
379379
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range);
380+
381+
/// If the loop is EVL tail folded, try and optimize any recipes that use a
382+
/// EVL based header mask to a VP intrinsic, e.g:
383+
///
384+
/// %mask = icmp step-vector, EVL
385+
/// %load = load %ptr, %mask
386+
///
387+
/// ->
388+
///
389+
/// %load = vp.load %ptr, EVL
390+
static void optimizeMasksToEVL(VPlan &Plan);
380391
};
381392

382393
} // namespace llvm

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,12 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
317317
break;
318318
}
319319
}
320+
if (const auto *EVLPhi = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
321+
if (!isa<VPCanonicalIVPHIRecipe>(std::prev(EVLPhi->getIterator()))) {
322+
errs() << "EVL-based IV is not immediately after canonical IV\n";
323+
return false;
324+
}
325+
}
320326
}
321327

322328
auto *IRBB = dyn_cast<VPIRBasicBlock>(VPBB);

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,12 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
361361
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
362362
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
363363
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
364+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2
364365
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
365366
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
366367
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP16]]
367368
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
368369
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
369-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2
370370
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
371371
; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP10]], 2
372372
; CHECK-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP22]], <vscale x 8 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])

llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
270270
; CHECK: vector.ph:
271271
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
272272
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
273+
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
273274
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
274275
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
275276
; CHECK: vector.body:
@@ -278,7 +279,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
278279
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
279280
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
280281
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
281-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
282282
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
283283
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
284284
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
@@ -354,6 +354,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
354354
; CHECK: vector.ph:
355355
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
356356
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
357+
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
357358
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
358359
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
359360
; CHECK: vector.body:
@@ -362,7 +363,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
362363
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
363364
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
364365
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
365-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
366366
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
367367
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
368368
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
@@ -576,14 +576,14 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
576576
; CHECK-NEXT: entry:
577577
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
578578
; CHECK: vector.ph:
579+
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
579580
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
580581
; CHECK: vector.body:
581582
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
582583
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
583584
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
584585
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP12]], i64 0
585586
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
586-
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
587587
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 16 x i32> [[TMP6]], [[BROADCAST_SPLAT]]
588588
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
589589
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
55
; CHECK-LABEL: add
66
; CHECK: LV(REG): VF = vscale x 4
77
; CHECK-NEXT: LV(REG): Found max usage: 2 item
8-
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
8+
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
99
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1010
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
1111
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
66
; ZVFH-LABEL: add
77
; ZVFH: LV(REG): VF = vscale x 4
88
; ZVFH-NEXT: LV(REG): Found max usage: 2 item
9-
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
9+
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
1010
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
1111
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
1212
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
1313
; ZVFHMIN-LABEL: add
1414
; ZVFHMIN: LV(REG): VF = vscale x 4
1515
; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
16-
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
16+
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
1717
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1818
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item
1919
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define i32 @dotp(ptr %a, ptr %b) {
55
; CHECK-REGS-VP: LV(REG): VF = vscale x 16
66
; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item
7-
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
7+
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
88
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers
99
; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item
1010
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers

0 commit comments

Comments
 (0)