Skip to content

[VPlan] Materialize VF and VFxUF using VPInstructions. #152879

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 12, 2025
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7304,6 +7304,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::materializeVectorTripCount(
BestVPlan, VectorPH, CM.foldTailByMasking(),
CM.requiresScalarEpilogue(BestVF.isVector()));
VPlanTransforms::materializeVFAndVFxUF(BestVPlan, VectorPH, BestVF);

// Perform the actual loop transformation.
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
Expand Down Expand Up @@ -7360,7 +7361,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
//===------------------------------------------------===//

// 2. Copy and widen instructions from the old loop into the new loop.
BestVPlan.prepareToExecute(State);
replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);

// Move check blocks to their final position.
Expand Down
16 changes: 0 additions & 16 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -951,22 +951,6 @@ VPlan::~VPlan() {
delete BackedgeTakenCount;
}

void VPlan::prepareToExecute(VPTransformState &State) {
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
Type *TCTy = VPTypeAnalysis(*this).inferScalarType(getTripCount());
// FIXME: Model VF * UF computation completely in VPlan.
unsigned UF = getUF();
if (VF.getNumUsers()) {
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
VF.setUnderlyingValue(RuntimeVF);
VFxUF.setUnderlyingValue(
UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
: RuntimeVF);
} else {
VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
}
}

VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const {
auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
return VPIRBB->getIRBasicBlock() == IRBB;
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1019,6 +1019,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// The lane specifies an index into a vector formed by combining all vector
/// operands (all operands after the first one).
ExtractLane,
VScale,

};

Expand Down Expand Up @@ -1167,6 +1168,7 @@ class VPInstructionWithType : public VPInstruction {
switch (VPI->getOpcode()) {
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
case VPInstruction::VScale:
return true;
default:
return false;
Expand Down Expand Up @@ -3968,9 +3970,6 @@ class VPlan {
VPBB->setPlan(this);
}

/// Prepare the plan for execution, setting up the required live-in values.
void prepareToExecute(VPTransformState &State);

/// Generate the IR code for this VPlan.
void execute(VPTransformState *State);

Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {

switch (Opcode) {
case VPInstruction::StepVector:
case VPInstruction::VScale:
return 0;
case Instruction::Alloca:
case Instruction::ExtractValue:
Expand Down Expand Up @@ -1027,6 +1028,7 @@ bool VPInstruction::isSingleScalar() const {
switch (getOpcode()) {
case Instruction::PHI:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::VScale:
return true;
default:
return isScalarCast();
Expand Down Expand Up @@ -1281,6 +1283,12 @@ void VPInstructionWithType::execute(VPTransformState &State) {
State.set(this, StepVector);
break;
}
case VPInstruction::VScale: {
Value *VScale = State.Builder.CreateVScale(ResultTy);
State.set(this, VScale, true);
break;
}

default:
llvm_unreachable("opcode not implemented yet");
}
Expand All @@ -1301,6 +1309,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::StepVector:
O << "step-vector " << *ResultTy;
break;
case VPInstruction::VScale:
O << "vscale " << *ResultTy;
break;
default:
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
O << Instruction::getOpcodeName(getOpcode()) << " ";
Expand Down
44 changes: 44 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3339,6 +3339,50 @@ void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
VectorTC.replaceAllUsesWith(Res);
}

void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
ElementCount VFEC) {
VPBuilder Builder(VectorPH, VectorPH->begin());
auto *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
VPValue &VF = Plan.getVF();
VPValue &VFxUF = Plan.getVFxUF();
Comment on lines +3343 to +3344
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So Plan's getVF() and getVFxUF() become obsolete from this point? They will remain use-less, no longer retrieving the relevant values. Worth noting, or hooking them to their replacements?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, for now, there are no users of getVF and getVFxUF at after this point. It would be good to mark them as such, but I'm not sure what the best way would be. Another alternative would be manage them as actual users. They should also be region-specific. I'll prepare some follow-ups for that.

if (VF.getNumUsers()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deal with the simpler complementary case first? Add comment why needed - distinct orders explained below. Can also guard handling VFxUF if it has users.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added comments, thanks! I've not added a use check for now, as this will easily be cleaned up by VPlan dce

VPValue *RuntimeVF =
Plan.getOrAddLiveIn(ConstantInt::get(TCTy, VFEC.getKnownMinValue()));
if (VFEC.isScalable())
RuntimeVF = Builder.createNaryOp(
Instruction::Mul,
{Builder.createNaryOp(VPInstruction::VScale, {}, TCTy), RuntimeVF},
VPIRFlags::WrapFlagsTy(true, false));
if (any_of(VF.users(), [&VF](VPUser *U) { return !U->usesScalars(&VF); })) {
auto *BC = Builder.createNaryOp(VPInstruction::Broadcast, {RuntimeVF});
VF.replaceUsesWithIf(
BC, [&VF](VPUser &U, unsigned) { return !U.usesScalars(&VF); });
}
VF.replaceAllUsesWith(RuntimeVF);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this work, given you're undoing the VF.replaceUsesWithIf call above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vector uses will be replaced above with the broadcast, the remaining users get the scalar value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see now, sorry! By the time we get to the second line the uses of VF that were replaced before are no longer present in the use list.


VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
auto *MulByUF = Plan.getUF() == 1 ? RuntimeVF
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you not just fall through to the bottom of the function - it looks like we're just duplicating code here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how, we if VF is used we first multiply VF * vscale and then multiply that by UF, wherease below we compute (VF * UF) * vscale

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps I'm missing something, but it looked like these two pieces of code were calculating the same thing:

    VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
    VPValue *MulByUF =
        Plan.getUF() == 1
            ? RuntimeVF
            : Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
  }

and

  VPValue *RuntimeVFxUF = Builder.createElementCount(TCTy, VFEC * Plan.getUF());

The only difference seems to be that you're not using createOverflowingOp when multiplying the UF, whereas Builder.createElementCount(TCTy, VFEC * Plan.getUF()) does.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, but they generate operations in slightly different order, the first does (VF * Vscale) * UF, whereas otherwise we do ((VF * UF) * vscale), the former will serve users of the runtime-vf.

Both compute the same value, but in different order. The patch keeps the sequence the same as for IR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, but I'm a bit surprised that the order of multiplication is crucial for correctness here. At the moment it just looks like the code is deliberately trying hard to maintain the order of multiplication whereas in reality if the order matters then something is horribly broken, right? I would expect the vectoriser to always choose VF,UF pairs such that (VF * VScale) * UF == (VF * UF) * VScale. If not, then I believe that we need to fix the vectoriser to ensure this cannot happen.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It’s not needed for correctness, as mentioned above they both compute the same value.

but if there are no users of the runtime vf, the current code constant folds the UF*VF.getKnownMinValue() computation, which is not possibly if there are users for VF.getKnownMinValue()*vscale.

one could argue that we could always create VFvscaleUF, and rely on recipe simplifications to fold the VFUF computations if there no users of VFvscale.

This would certainly be possible once we have the computation explicit in VPlan, but the current patch just tries to port the existing computation, to avoid unrelated test changes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK fair enough, thanks for the explanation. I do get the argument that you want to minimise test changes and I'm happy with that. I just wanted to understand the rationale behind it that's all and make sure there isn't a hidden bug or assumption here.

: Builder.createNaryOp(Instruction::Mul,
{RuntimeVF, UF});
VFxUF.replaceAllUsesWith(MulByUF);
return;
}

unsigned VFMulUF = VFEC.getKnownMinValue() * Plan.getUF();
VPValue *RuntimeVFxUF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, VFMulUF));
if (VFEC.isScalable()) {
RuntimeVFxUF =
VFMulUF == 1
? RuntimeVFxUF
: Builder.createNaryOp(
Instruction::Mul,
{Builder.createNaryOp(VPInstruction::VScale, {}, TCTy),
RuntimeVFxUF},
VPIRFlags::WrapFlagsTy(true, false));
}
VFxUF.replaceAllUsesWith(RuntimeVFxUF);
}

/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ struct VPlanTransforms {
static void materializeBackedgeTakenCount(VPlan &Plan,
VPBasicBlock *VectorPH);

/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
ElementCount VF);

/// Try to convert a plan with interleave groups with VF elements to a plan
/// with the interleave groups replaced by wide loads and stores processing VF
/// elements, if all transformed interleave groups access the full vector
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ define void @induction_i7(ptr %dst) #0 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc <vscale x 2 x i64> [[DOTSPLAT_]] to <vscale x 2 x i7>
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i7>
Expand Down Expand Up @@ -76,12 +76,12 @@ define void @induction_i3_zext(ptr %dst) #0 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc <vscale x 2 x i64> [[DOTSPLAT_]] to <vscale x 2 x i3>
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i3>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' {
; CHECK-NEXT: Live-in ir<[[EP_VFxUF:.+]]> = VF * UF
; CHECK-NEXT: Live-in ir<1024> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 0, i32 [[TMP4]]
Expand All @@ -27,21 +24,20 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> [[VECTOR_RECUR]], <vscale x 1 x i64> [[WIDE_LOAD]], i32 -1)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP7]], ptr [[TMP8]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 23
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 1 x i64> [[WIDE_LOAD]], i32 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 1 x i64> [[TMP7]], i32 [[TMP15]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 23, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,6 @@ define void @load_store_factor5(ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -1155,7 +1154,6 @@ define void @load_store_factor5(ptr %p) {
; SCALABLE-NEXT: entry:
; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -1273,7 +1271,6 @@ define void @load_store_factor6(ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -1431,7 +1428,6 @@ define void @load_store_factor6(ptr %p) {
; SCALABLE-NEXT: entry:
; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -1562,7 +1558,6 @@ define void @load_store_factor7(ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -1736,7 +1731,6 @@ define void @load_store_factor7(ptr %p) {
; SCALABLE-NEXT: entry:
; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -1880,7 +1874,6 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -2067,7 +2060,6 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: entry:
; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SCALABLE: vector.ph:
; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,20 @@ define void @load_store(ptr %p) {
; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; LMUL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; LMUL1: vector.ph:
; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]]
; LMUL1: vector.body:
; LMUL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; LMUL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[INDEX]]
; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP3]], align 8
; LMUL1-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], splat (i64 1)
; LMUL1-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP3]], align 8
; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; LMUL1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; LMUL1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
; LMUL1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; LMUL1-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; LMUL1: middle.block:
; LMUL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; LMUL1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; LMUL1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; LMUL1: scalar.ph:
; LMUL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; LMUL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; LMUL1-NEXT: br label [[FOR_BODY:%.*]]
; LMUL1: for.body:
; LMUL1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,6 @@ define void @no_high_lmul_or_interleave(ptr %p) {
; IF-EVL-NEXT: entry:
; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IF-EVL: vector.ph:
; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down
Loading