Skip to content

Commit 9f1c2e3

Browse files
committed
[VPlan] Move addExplicitVectorLength to tryToBuildVPlanWithVPRecipes
Stacked on #166158 Currently we convert a VPlan to an EVL tail folded one after the VPlan is built and optimized, which doesn't match how we handle regular tail folding. This addresses a long standing TODO by performing it much earlier in the pipeline before any optimizations are run, and simulatneously splits out optimizeMaskToEVL into a separate pass to be run during VPlanTransforms::optimize. This way the two parts of EVL tail folding are separated into those needed for correctness and those that are an optimization. - We don't need to remove the old recipes ourselves anymore and can leave it to removeDeadRecipes - createScalarIVSteps needs to be updated to use the EVL based IV if it exists, so a helper method was added to VPlan to extract it - VPlanVerifier was updated to check that the EVL based IV always immediately follows the canonical IV Because we now optimize the VPlan after the EVL stuff is added, some simplifications e.g. replacing a scalar-steps when UF=1 kick in for the initial VPlan. Fixes #153144
1 parent 8fc3b21 commit 9f1c2e3

18 files changed

+120
-119
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8216,10 +8216,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
82168216
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
82178217
*Plan, CM.getMinimalBitwidths());
82188218
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
8219-
// TODO: try to put it close to addActiveLaneMask().
8220-
if (CM.foldTailWithEVL())
8221-
VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
8222-
*Plan, CM.getMaxSafeElements());
82238219
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
82248220
VPlans.push_back(std::move(Plan));
82258221
}
@@ -8483,6 +8479,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84838479
}
84848480
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, *PSE.getSE());
84858481

8482+
if (CM.foldTailWithEVL())
8483+
VPlanTransforms::addExplicitVectorLength(*Plan, CM.getMaxSafeElements());
8484+
84868485
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
84878486
return Plan;
84888487
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4118,6 +4118,11 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
41184118
return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
41194119
}
41204120

4121+
VPEVLBasedIVPHIRecipe *getEVLBasedIV() {
4122+
return dyn_cast<VPEVLBasedIVPHIRecipe>(
4123+
std::next(getCanonicalIV()->getIterator()));
4124+
}
4125+
41214126
/// Return the type of the canonical IV for loop regions.
41224127
Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
41234128
const Type *getCanonicalIVType() const {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 41 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -610,9 +610,11 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
610610
VPBuilder &Builder) {
611611
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
612612
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
613-
VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
614-
VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
615-
Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
613+
VPValue *IV = LoopRegion->getCanonicalIV();
614+
if (auto *EVLIV = LoopRegion->getEVLBasedIV())
615+
IV = EVLIV;
616+
VPSingleDefRecipe *BaseIV =
617+
Builder.createDerivedIV(Kind, FPBinOp, StartV, IV, Step, "offset.idx");
616618

617619
// Truncate base induction if needed.
618620
VPTypeAnalysis TypeInfo(Plan);
@@ -2327,6 +2329,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
23272329
runPass(removeRedundantExpandSCEVRecipes, Plan);
23282330
runPass(simplifyRecipes, Plan);
23292331
runPass(removeBranchOnConst, Plan);
2332+
runPass(optimizeMasksToEVL, Plan);
23302333
runPass(removeDeadRecipes, Plan);
23312334

23322335
runPass(createAndOptimizeReplicateRegions, Plan);
@@ -2617,8 +2620,40 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
26172620
return nullptr;
26182621
}
26192622

2620-
/// Replace recipes with their EVL variants.
2621-
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
2623+
void VPlanTransforms::optimizeMasksToEVL(VPlan &Plan) {
2624+
// Find the EVL-based header mask if it exists: icmp ult step-vector, EVL
2625+
VPInstruction *HeaderMask = nullptr;
2626+
for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) {
2627+
if (match(&R, m_ICmp(m_VPInstruction<VPInstruction::StepVector>(),
2628+
m_EVL(m_VPValue())))) {
2629+
HeaderMask = cast<VPInstruction>(&R);
2630+
break;
2631+
}
2632+
}
2633+
if (!HeaderMask)
2634+
return;
2635+
2636+
VPValue *EVL = HeaderMask->getOperand(1);
2637+
2638+
VPTypeAnalysis TypeInfo(Plan);
2639+
2640+
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
2641+
VPRecipeBase *R = cast<VPRecipeBase>(U);
2642+
if (auto *NewR = optimizeMaskToEVL(HeaderMask, *R, TypeInfo, *EVL)) {
2643+
NewR->insertBefore(R);
2644+
for (auto [Old, New] :
2645+
zip_equal(R->definedValues(), NewR->definedValues()))
2646+
Old->replaceAllUsesWith(New);
2647+
// Erase dead stores, the rest will be removed by removeDeadRecipes.
2648+
if (R->getNumDefinedValues() == 0)
2649+
R->eraseFromParent();
2650+
}
2651+
}
2652+
}
2653+
2654+
/// After replacing the IV with a EVL-based IV, fixup recipes that use VF to use
2655+
/// the EVL instead to avoid incorrect updates on the penultimate iteration.
2656+
static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) {
26222657
VPTypeAnalysis TypeInfo(Plan);
26232658
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
26242659
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
@@ -2646,10 +2681,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
26462681
return isa<VPWidenPointerInductionRecipe>(U);
26472682
});
26482683

2649-
// Defer erasing recipes till the end so that we don't invalidate the
2650-
// VPTypeAnalysis cache.
2651-
SmallVector<VPRecipeBase *> ToErase;
2652-
26532684
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
26542685
// contained.
26552686
bool ContainsFORs =
@@ -2683,7 +2714,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
26832714
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
26842715
VPSplice->insertBefore(&R);
26852716
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2686-
ToErase.push_back(&R);
26872717
}
26882718
}
26892719
}
@@ -2704,43 +2734,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
27042734
CmpInst::ICMP_ULT,
27052735
Builder.createNaryOp(VPInstruction::StepVector, {}, EVLType), &EVL);
27062736
HeaderMask->replaceAllUsesWith(EVLMask);
2707-
ToErase.push_back(HeaderMask->getDefiningRecipe());
2708-
2709-
// Try to optimize header mask recipes away to their EVL variants.
2710-
// TODO: Split optimizeMaskToEVL out and move into
2711-
// VPlanTransforms::optimize. transformRecipestoEVLRecipes should be run in
2712-
// tryToBuildVPlanWithVPRecipes beforehand.
2713-
for (VPUser *U : collectUsersRecursively(EVLMask)) {
2714-
auto *CurRecipe = cast<VPRecipeBase>(U);
2715-
VPRecipeBase *EVLRecipe =
2716-
optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);
2717-
if (!EVLRecipe)
2718-
continue;
2719-
2720-
unsigned NumDefVal = EVLRecipe->getNumDefinedValues();
2721-
assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2722-
"New recipe must define the same number of values as the "
2723-
"original.");
2724-
EVLRecipe->insertBefore(CurRecipe);
2725-
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe, VPInterleaveEVLRecipe>(
2726-
EVLRecipe)) {
2727-
for (unsigned I = 0; I < NumDefVal; ++I) {
2728-
VPValue *CurVPV = CurRecipe->getVPValue(I);
2729-
CurVPV->replaceAllUsesWith(EVLRecipe->getVPValue(I));
2730-
}
2731-
}
2732-
ToErase.push_back(CurRecipe);
2733-
}
2734-
// Remove dead EVL mask.
2735-
if (EVLMask->getNumUsers() == 0)
2736-
ToErase.push_back(EVLMask->getDefiningRecipe());
2737-
2738-
for (VPRecipeBase *R : reverse(ToErase)) {
2739-
SmallVector<VPValue *> PossiblyDead(R->operands());
2740-
R->eraseFromParent();
2741-
for (VPValue *Op : PossiblyDead)
2742-
recursivelyDeleteDeadRecipes(Op);
2743-
}
27442737
}
27452738

27462739
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
@@ -2838,7 +2831,7 @@ void VPlanTransforms::addExplicitVectorLength(
28382831
DebugLoc::getCompilerGenerated(), "avl.next");
28392832
AVLPhi->addOperand(NextAVL);
28402833

2841-
transformRecipestoEVLRecipes(Plan, *VPEVL);
2834+
fixupVFUsersForEVL(Plan, *VPEVL);
28422835

28432836
// Replace all uses of VPCanonicalIVPHIRecipe by
28442837
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,17 @@ struct VPlanTransforms {
377377
/// users in the original exit block using the VPIRInstruction wrapping to the
378378
/// LCSSA phi.
379379
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range);
380+
381+
/// If the loop is EVL tail folded, try and optimize any recipes that use a
382+
/// EVL based header mask to a VP intrinsic, e.g:
383+
///
384+
/// %mask = icmp step-vector, EVL
385+
/// %load = load %ptr, %mask
386+
///
387+
/// ->
388+
///
389+
/// %load = vp.load %ptr, EVL
390+
static void optimizeMasksToEVL(VPlan &Plan);
380391
};
381392

382393
} // namespace llvm

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,12 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
310310
break;
311311
}
312312
}
313+
if (const auto *EVLPhi = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
314+
if (!isa<VPCanonicalIVPHIRecipe>(std::prev(EVLPhi->getIterator()))) {
315+
errs() << "EVL-based IV is not immediately after canonical IV\n";
316+
return false;
317+
}
318+
}
313319
}
314320

315321
auto *IRBB = dyn_cast<VPIRBasicBlock>(VPBB);

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,12 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
361361
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
362362
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
363363
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
364+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2
364365
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
365366
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
366367
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP16]]
367368
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
368369
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
369-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2
370370
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
371371
; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP10]], 2
372372
; CHECK-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP22]], <vscale x 8 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])

llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
270270
; CHECK: vector.ph:
271271
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
272272
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
273+
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
273274
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
274275
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
275276
; CHECK: vector.body:
@@ -278,7 +279,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
278279
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
279280
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
280281
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
281-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
282282
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
283283
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
284284
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
@@ -351,6 +351,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
351351
; CHECK: vector.ph:
352352
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
353353
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
354+
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
354355
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
355356
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
356357
; CHECK: vector.body:
@@ -359,7 +360,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
359360
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
360361
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
361362
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
362-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
363363
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
364364
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
365365
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
@@ -570,14 +570,14 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
570570
; CHECK-NEXT: entry:
571571
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
572572
; CHECK: vector.ph:
573+
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
573574
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
574575
; CHECK: vector.body:
575576
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
576577
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
577578
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
578579
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP12]], i64 0
579580
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
580-
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
581581
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 16 x i32> [[TMP6]], [[BROADCAST_SPLAT]]
582582
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
583583
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
55
; CHECK-LABEL: add
66
; CHECK: LV(REG): VF = vscale x 4
77
; CHECK-NEXT: LV(REG): Found max usage: 2 item
8-
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
8+
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
99
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1010
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
1111
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
66
; ZVFH-LABEL: add
77
; ZVFH: LV(REG): VF = vscale x 4
88
; ZVFH-NEXT: LV(REG): Found max usage: 2 item
9-
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
9+
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
1010
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
1111
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
1212
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
1313
; ZVFHMIN-LABEL: add
1414
; ZVFHMIN: LV(REG): VF = vscale x 4
1515
; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
16-
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
16+
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
1717
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1818
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item
1919
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define i32 @dotp(ptr %a, ptr %b) {
55
; CHECK-REGS-VP: LV(REG): VF = vscale x 16
66
; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item
7-
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
7+
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
88
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers
99
; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item
1010
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers

0 commit comments

Comments
 (0)