Skip to content

Commit 82d633e

Browse files
authored
[VPlan] Materialize vector trip count using VPInstructions. (#151925)
Materialize the vector trip count computation using VPInstruction instead of directly creating IR. This is one of the last few steps needed to model the full vector skeleton in VPlan. It also simplifies vector-trip count computations for scalable vectors, as we can re-use the UF x VF computation. PR: #151925
1 parent 9349484 commit 82d633e

File tree

138 files changed

+1601
-4098
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+1601
-4098
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -548,9 +548,6 @@ class InnerLoopVectorizer {
548548
protected:
549549
friend class LoopVectorizationPlanner;
550550

551-
/// Returns (and creates if needed) the trip count of the widened loop.
552-
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
553-
554551
// Create a check to see if the vector loop should be executed
555552
Value *createIterationCountCheck(ElementCount VF, unsigned UF) const;
556553

@@ -2272,56 +2269,6 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
22722269
return TTI.enableMaskedInterleavedAccessVectorization();
22732270
}
22742271

2275-
Value *
2276-
InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
2277-
if (VectorTripCount)
2278-
return VectorTripCount;
2279-
2280-
Value *TC = getTripCount();
2281-
IRBuilder<> Builder(InsertBlock->getTerminator());
2282-
2283-
Type *Ty = TC->getType();
2284-
// This is where we can make the step a runtime constant.
2285-
Value *Step = createStepForVF(Builder, Ty, VF, UF);
2286-
2287-
// If the tail is to be folded by masking, round the number of iterations N
2288-
// up to a multiple of Step instead of rounding down. This is done by first
2289-
// adding Step-1 and then rounding down. Note that it's ok if this addition
2290-
// overflows: the vector induction variable will eventually wrap to zero given
2291-
// that it starts at zero and its Step is a power of two; the loop will then
2292-
// exit, with the last early-exit vector comparison also producing all-true.
2293-
// For scalable vectors the VF is not guaranteed to be a power of 2, but this
2294-
// is accounted for in emitIterationCountCheck that adds an overflow check.
2295-
if (Cost->foldTailByMasking()) {
2296-
assert(isPowerOf2_32(VF.getKnownMinValue() * UF) &&
2297-
"VF*UF must be a power of 2 when folding tail by masking");
2298-
TC = Builder.CreateAdd(TC, Builder.CreateSub(Step, ConstantInt::get(Ty, 1)),
2299-
"n.rnd.up");
2300-
}
2301-
2302-
// Now we need to generate the expression for the part of the loop that the
2303-
// vectorized body will execute. This is equal to N - (N % Step) if scalar
2304-
// iterations are not required for correctness, or N - Step, otherwise. Step
2305-
// is equal to the vectorization factor (number of SIMD elements) times the
2306-
// unroll factor (number of SIMD instructions).
2307-
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
2308-
2309-
// There are cases where we *must* run at least one iteration in the remainder
2310-
// loop. See the cost model for when this can happen. If the step evenly
2311-
// divides the trip count, we set the remainder to be equal to the step. If
2312-
// the step does not evenly divide the trip count, no adjustment is necessary
2313-
// since there will already be scalar iterations. Note that the minimum
2314-
// iterations check ensures that N >= Step.
2315-
if (Cost->requiresScalarEpilogue(VF.isVector())) {
2316-
auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
2317-
R = Builder.CreateSelect(IsZero, Step, R);
2318-
}
2319-
2320-
VectorTripCount = Builder.CreateSub(TC, R, "n.vec");
2321-
2322-
return VectorTripCount;
2323-
}
2324-
23252272
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
23262273
// Note: The block with the minimum trip-count check is already connected
23272274
// during earlier VPlan construction.
@@ -7354,6 +7301,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73547301
// Canonicalize EVL loops after regions are dissolved.
73557302
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
73567303
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
7304+
VPlanTransforms::materializeVectorTripCount(
7305+
BestVPlan, VectorPH, CM.foldTailByMasking(),
7306+
CM.requiresScalarEpilogue(BestVF.isVector()));
73577307

73587308
// Perform the actual loop transformation.
73597309
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
@@ -7410,8 +7360,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
74107360
//===------------------------------------------------===//
74117361

74127362
// 2. Copy and widen instructions from the old loop into the new loop.
7413-
BestVPlan.prepareToExecute(
7414-
ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State);
7363+
BestVPlan.prepareToExecute(State);
74157364
replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);
74167365

74177366
// Move check blocks to their final position.
@@ -9407,13 +9356,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
94079356
State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, Kind,
94089357
cast_if_present<BinaryOperator>(FPBinOp));
94099358
DerivedIV->setName(Name);
9410-
// If index is the vector trip count, the concrete value will only be set in
9411-
// prepareToExecute, leading to missed simplifications, e.g. if it is 0.
9412-
// TODO: Remove the special case for the vector trip count once it is computed
9413-
// in VPlan and can be used during VPlan simplification.
9414-
assert((DerivedIV != Index ||
9415-
getOperand(1) == &getParent()->getPlan()->getVectorTripCount()) &&
9416-
"IV didn't need transforming?");
94179359
State.set(this, DerivedIV, VPLane(0));
94189360
}
94199361

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -951,15 +951,9 @@ VPlan::~VPlan() {
951951
delete BackedgeTakenCount;
952952
}
953953

954-
void VPlan::prepareToExecute(Value *VectorTripCountV, VPTransformState &State) {
955-
if (!VectorTripCount.getUnderlyingValue())
956-
VectorTripCount.setUnderlyingValue(VectorTripCountV);
957-
else
958-
assert(VectorTripCount.getUnderlyingValue() == VectorTripCountV &&
959-
"VectorTripCount set earlier must much VectorTripCountV");
960-
954+
void VPlan::prepareToExecute(VPTransformState &State) {
961955
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
962-
Type *TCTy = VectorTripCountV->getType();
956+
Type *TCTy = VPTypeAnalysis(*this).inferScalarType(getTripCount());
963957
// FIXME: Model VF * UF computation completely in VPlan.
964958
unsigned UF = getUF();
965959
if (VF.getNumUsers()) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3969,7 +3969,7 @@ class VPlan {
39693969
}
39703970

39713971
/// Prepare the plan for execution, setting up the required live-in values.
3972-
void prepareToExecute(Value *VectorTripCount, VPTransformState &State);
3972+
void prepareToExecute(VPTransformState &State);
39733973

39743974
/// Generate the IR code for this VPlan.
39753975
void execute(VPTransformState *State);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3278,6 +3278,67 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
32783278
BTC->replaceAllUsesWith(TCMO);
32793279
}
32803280

3281+
void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
3282+
VPBasicBlock *VectorPHVPBB,
3283+
bool TailByMasking,
3284+
bool RequiresScalarEpilogue) {
3285+
VPValue &VectorTC = Plan.getVectorTripCount();
3286+
assert(VectorTC.isLiveIn() && "vector-trip-count must be a live-in");
3287+
// There's nothing to do if there are no users of the vector trip count or its
3288+
// IR value has already been set.
3289+
if (VectorTC.getNumUsers() == 0 || VectorTC.getLiveInIRValue())
3290+
return;
3291+
3292+
VPValue *TC = Plan.getTripCount();
3293+
Type *TCTy = VPTypeAnalysis(Plan).inferScalarType(TC);
3294+
VPBuilder Builder(VectorPHVPBB, VectorPHVPBB->begin());
3295+
VPValue *Step = &Plan.getVFxUF();
3296+
3297+
// If the tail is to be folded by masking, round the number of iterations N
3298+
// up to a multiple of Step instead of rounding down. This is done by first
3299+
// adding Step-1 and then rounding down. Note that it's ok if this addition
3300+
// overflows: the vector induction variable will eventually wrap to zero given
3301+
// that it starts at zero and its Step is a power of two; the loop will then
3302+
// exit, with the last early-exit vector comparison also producing all-true.
3303+
// For scalable vectors the VF is not guaranteed to be a power of 2, but this
3304+
// is accounted for in emitIterationCountCheck that adds an overflow check.
3305+
if (TailByMasking) {
3306+
TC = Builder.createNaryOp(
3307+
Instruction::Add,
3308+
{TC, Builder.createNaryOp(
3309+
Instruction::Sub,
3310+
{Step, Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 1))})},
3311+
DebugLoc::getCompilerGenerated(), "n.rnd.up");
3312+
}
3313+
3314+
// Now we need to generate the expression for the part of the loop that the
3315+
// vectorized body will execute. This is equal to N - (N % Step) if scalar
3316+
// iterations are not required for correctness, or N - Step, otherwise. Step
3317+
// is equal to the vectorization factor (number of SIMD elements) times the
3318+
// unroll factor (number of SIMD instructions).
3319+
VPValue *R =
3320+
Builder.createNaryOp(Instruction::URem, {TC, Step},
3321+
DebugLoc::getCompilerGenerated(), "n.mod.vf");
3322+
3323+
// There are cases where we *must* run at least one iteration in the remainder
3324+
// loop. See the cost model for when this can happen. If the step evenly
3325+
// divides the trip count, we set the remainder to be equal to the step. If
3326+
// the step does not evenly divide the trip count, no adjustment is necessary
3327+
// since there will already be scalar iterations. Note that the minimum
3328+
// iterations check ensures that N >= Step.
3329+
if (RequiresScalarEpilogue) {
3330+
assert(!TailByMasking &&
3331+
"requiring scalar epilogue is not supported with fail folding");
3332+
VPValue *IsZero = Builder.createICmp(
3333+
CmpInst::ICMP_EQ, R, Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 0)));
3334+
R = Builder.createSelect(IsZero, Step, R);
3335+
}
3336+
3337+
VPValue *Res = Builder.createNaryOp(
3338+
Instruction::Sub, {TC, R}, DebugLoc::getCompilerGenerated(), "n.vec");
3339+
VectorTC.replaceAllUsesWith(Res);
3340+
}
3341+
32813342
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
32823343
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
32833344
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,12 @@ struct VPlanTransforms {
256256
unsigned BestUF,
257257
PredicatedScalarEvolution &PSE);
258258

259+
/// Materialize vector trip count computations to a set of VPInstructions.
260+
static void materializeVectorTripCount(VPlan &Plan,
261+
VPBasicBlock *VectorPHVPBB,
262+
bool TailByMasking,
263+
bool RequiresScalarEpilogue);
264+
259265
/// Materialize the backedge-taken count to be computed explicitly using
260266
/// VPInstructions.
261267
static void materializeBackedgeTakenCount(VPlan &Plan,

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,13 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
99
; CHECK: vector.ph:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1111
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
12-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
13-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
14-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
15-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
16-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
17-
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
1812
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
1913
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
2014
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2115
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2216
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2317
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
24-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
18+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]]
2519
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
2620
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2721
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -34,7 +28,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
3428
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
3529
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
3630
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[NEXT_GEP]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
37-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
31+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
3832
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
3933
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
4034
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -92,19 +86,13 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
9286
; CHECK: vector.ph:
9387
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
9488
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
95-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
96-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
97-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
98-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
99-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
100-
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
10189
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
10290
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
10391
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
10492
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
10593
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
10694
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
107-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
95+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]]
10896
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
10997
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
11098
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -117,7 +105,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
117105
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
118106
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
119107
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[NEXT_GEP]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
120-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
108+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
121109
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
122110
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
123111
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -702,12 +702,6 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
702702
; PRED: [[VECTOR_PH]]:
703703
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
704704
; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
705-
; PRED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
706-
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]]
707-
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
708-
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
709-
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
710-
; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
711705
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
712706
; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
713707
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
@@ -726,7 +720,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
726720
; PRED-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
727721
; PRED-NEXT: [[TMP14:%.*]] = uitofp <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x double>
728722
; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[NEXT_GEP]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
729-
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]]
723+
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
730724
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]])
731725
; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
732726
; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[TMP16]], i32 0
@@ -1242,9 +1236,6 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
12421236
; PRED-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
12431237
; PRED-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
12441238
; PRED: [[VECTOR_PH]]:
1245-
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 7
1246-
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
1247-
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
12481239
; PRED-NEXT: [[TMP15:%.*]] = sub i64 [[TMP0]], 8
12491240
; PRED-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[TMP0]], 8
12501241
; PRED-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0

0 commit comments

Comments
 (0)