Skip to content

Commit d76cd7b

Browse files
committed
[VPlan] Fix header masks in EVL tail folding
With EVL tail folding, the EVL may not always be VF on the second-to-last iteration. Recipes that have been converted to VP intrinsics via optimizeMaskToEVL account for this, but recipes that are left behind will still use the old header mask which may end up having a different vector length. This is effectively the same as llvm#95368, and fixes this by converting header masks from icmp ule wide-canonical-iv, backedge-trip-count -> icmp ult step-vector, evl. Without it, recipes that fall through optimizeMaskToEVL may use the wrong vector length, e.g. in llvm#150074 and llvm#149981. We really need to split off optimizeMaskToEVL into VPlanTransforms::optimize and move transformRecipestoEVLRecipes into tryToBuildVPlanWithVPRecipes, so we don't mix up what is needed for correctness and what is needed to optimize away the mask computations. We should be able to still generate a correct albeit suboptimal VPlan without running optimizeMaskToEVL. I've added a TODO for this. Fixes llvm#150197
1 parent 20c52e4 commit d76cd7b

File tree

5 files changed

+44
-25
lines changed

5 files changed

+44
-25
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2235,6 +2235,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22352235
}
22362236

22372237
// Try to optimize header mask recipes away to their EVL variants.
2238+
//
2239+
// TODO: Split this out and move into VPlanTransforms::optimize.
2240+
// transformRecipestoEVLRecipes should be run in tryToBuildVPlanWithVPRecipes
2241+
// beforehand.
22382242
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
22392243
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
22402244
auto *CurRecipe = cast<VPRecipeBase>(U);
@@ -2265,6 +2269,27 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22652269
for (VPValue *Op : PossiblyDead)
22662270
recursivelyDeleteDeadRecipes(Op);
22672271
}
2272+
2273+
// Replace header masks with a mask equivalent to predicating by EVL:
2274+
//
2275+
// icmp ule widen-canonical-iv backedge-taken-count
2276+
// ->
2277+
// icmp ult step-vector, EVL
2278+
Type *EVLType = TypeInfo.inferScalarType(&EVL);
2279+
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
2280+
if (HeaderMask->users().empty())
2281+
continue;
2282+
VPRecipeBase *EVLR = EVL.getDefiningRecipe();
2283+
VPBuilder Builder(Plan.getVectorPreheader());
2284+
VPValue *StepVector =
2285+
Builder.createNaryOp(VPInstruction::StepVector, {}, EVLType);
2286+
Builder.setInsertPoint(EVLR->getParent(), std::next(EVLR->getIterator()));
2287+
VPValue *EVLMask = Builder.createICmp(
2288+
CmpInst::ICMP_ULT, StepVector,
2289+
Builder.createNaryOp(VPInstruction::Broadcast, {&EVL}));
2290+
HeaderMask->replaceAllUsesWith(EVLMask);
2291+
HeaderMask->getDefiningRecipe()->eraseFromParent();
2292+
}
22682293
}
22692294

22702295
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
173173
.Case<VPInstruction>([&](const VPInstruction *I) {
174174
if (I->getOpcode() == Instruction::PHI)
175175
return VerifyEVLUse(*I, 1);
176+
if (I->getOpcode() == VPInstruction::Broadcast)
177+
return VerifyEVLUse(*I, 0);
176178
switch (I->getOpcode()) {
177179
case Instruction::Add:
178180
break;

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
2323
; CHECK-NEXT: [[TMP6:%.*]] = ashr <vscale x 2 x i64> [[TMP5]], splat (i64 52)
2424
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i64> [[TMP6]] to <vscale x 2 x i32>
2525
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 2 x i8> [[BROADCAST_SPLAT]] to <vscale x 2 x i32>
26+
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
2627
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P]], i64 0
2728
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
2829
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
@@ -38,7 +39,9 @@ define void @test(ptr %p, i64 %a, i8 %b) {
3839
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]]
3940
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
4041
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
41-
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 2 x i32> [[VEC_IND]], splat (i32 8)
42+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i64 0
43+
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT7]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
44+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult <vscale x 2 x i32> [[TMP19]], [[BROADCAST_SPLAT8]]
4245
; CHECK-NEXT: [[TMP14:%.*]] = icmp sge <vscale x 2 x i32> [[VEC_IND]], splat (i32 2)
4346
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
4447
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i32> [[TMP7]], <vscale x 2 x i32> [[TMP8]]

llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
383383
; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
384384
; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
385385
; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
386+
; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
386387
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[B]], i64 0
387388
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
388389
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -399,7 +400,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
399400
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP11]]
400401
; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
401402
; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
402-
; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IND]], splat (i64 1024)
403+
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP7]], i64 0
404+
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
405+
; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = icmp ult <vscale x 4 x i32> [[TMP16]], [[BROADCAST_SPLAT4]]
403406
; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 4 x i64> [[VEC_IND]], splat (i64 10)
404407
; TF-SCALABLE-NEXT: [[TMP9:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> zeroinitializer
405408
; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 4 x i1> [[TMP10]], i32 [[TMP7]])

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -262,25 +262,20 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
262262
; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
263263
; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
264264
; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
265-
; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
266265
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
267266
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
268267
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
269-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
270-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
268+
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
271269
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
272270
; IF-EVL-OUTLOOP: vector.body:
273271
; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
274272
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
275273
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
276274
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
277275
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true)
278-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EVL_BASED_IV]], i64 0
279-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
280-
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
281-
; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP13]]
282-
; IF-EVL-OUTLOOP-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP14]]
283-
; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
276+
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0
277+
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
278+
; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 4 x i32> [[TMP12]], [[BROADCAST_SPLAT]]
284279
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]]
285280
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
286281
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -790,35 +785,27 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
790785
; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
791786
; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
792787
; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
793-
; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
794788
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
795789
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
796790
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
797-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
798-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
799-
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
800-
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> [[TMP10]], splat (i64 1)
801-
; IF-EVL-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP11]]
802791
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
803-
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
792+
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
793+
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> [[TMP10]], splat (i32 1)
804794
; IF-EVL-OUTLOOP-NEXT: [[INDUCTION1:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP13]]
805795
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
806796
; IF-EVL-OUTLOOP: vector.body:
807797
; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
808798
; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
809-
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
810799
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ]
811800
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND2:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
812801
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[IV]]
813802
; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
814803
; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = mul i32 1, [[TMP14]]
815804
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP15]], i64 0
816805
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
817-
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP14]] to i64
818-
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP16]]
819-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
820-
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
821-
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
806+
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP14]], i64 0
807+
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
808+
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp ult <vscale x 4 x i32> [[TMP12]], [[BROADCAST_SPLAT2]]
822809
; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
823810
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 0
824811
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@@ -830,7 +817,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
830817
; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64
831818
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]]
832819
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
833-
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]]
834820
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add <vscale x 4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT4]]
835821
; IF-EVL-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
836822
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]

0 commit comments

Comments
 (0)