Skip to content

Commit be3ada7

Browse files
committed
[VPlan] Avoid copying over nsw on certain inductions
As the test case shows, the induction could have a negative start value and a positive step value, which would prevent us from copying over nsw from the original scalar loop. Fixes #168902.
1 parent 8ac24a0 commit be3ada7

26 files changed

+164
-154
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ class InductionDescriptor {
398398
InductionDescriptor() = default;
399399

400400
Value *getStartValue() const { return StartValue; }
401+
ConstantInt *getConstIntStartValue() const;
401402
InductionKind getKind() const { return IK; }
402403
const SCEV *getStep() const { return Step; }
403404
BinaryOperator *getInductionBinOp() const { return InductionBinOp; }

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,10 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
13951395
llvm::append_range(RedundantCasts, *Casts);
13961396
}
13971397

1398+
ConstantInt *InductionDescriptor::getConstIntStartValue() const {
1399+
return dyn_cast<ConstantInt>(StartValue.getValPtr());
1400+
}
1401+
13981402
ConstantInt *InductionDescriptor::getConstIntStepValue() const {
13991403
if (isa<SCEVConstant>(Step))
14001404
return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7640,9 +7640,8 @@ createWidenInductionRecipes(VPInstruction *PhiR,
76407640
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
76417641
"Start VPValue must match IndDesc's start value");
76427642

7643-
// It is always safe to copy over the NoWrap and FastMath flags. In
7644-
// particular, when folding tail by masking, the masked-off lanes are never
7645-
// used, so it is safe.
7643+
// It is always safe to copy over nuw and FastMath flags. In particular, when
7644+
// folding tail by masking, the masked-off lanes are neverused, so it is safe.
76467645
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
76477646
VPValue *Step =
76487647
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
@@ -7711,9 +7710,9 @@ VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
77117710
VPValue *Start = WidenIV->getStartValue();
77127711
const InductionDescriptor &IndDesc = WidenIV->getInductionDescriptor();
77137712

7714-
// It is always safe to copy over the NoWrap and FastMath flags. In
7715-
// particular, when folding tail by masking, the masked-off lanes are never
7716-
// used, so it is safe.
7713+
// It is always safe to copy over nuw and FastMath flags. In particular, when
7714+
// folding tail by masking, the masked-off lanes are never used, so it is
7715+
// safe.
77177716
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
77187717
VPValue *Step =
77197718
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
8181
VPValue *Start = Plan.getOrAddLiveIn(II->getStartValue());
8282
VPValue *Step =
8383
vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
84-
// It is always safe to copy over the NoWrap and FastMath flags. In
84+
// It is always safe to copy over nuw and FastMath flags. In
8585
// particular, when folding tail by masking, the masked-off lanes are
8686
// never used, so it is safe.
8787
VPIRFlags Flags = vputils::getFlagsFromIndDesc(*II);

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,15 @@ inline VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID) {
8787
return ID.getInductionBinOp()->getFastMathFlags();
8888

8989
if (auto *OBO = dyn_cast_if_present<OverflowingBinaryOperator>(
90-
ID.getInductionBinOp()))
91-
return VPIRFlags::WrapFlagsTy(OBO->hasNoUnsignedWrap(),
92-
OBO->hasNoSignedWrap());
90+
ID.getInductionBinOp())) {
91+
// We must drop nsw if the signs of the induction start and the induction
92+
// step differ, as this could sign-wrap over iterations.
93+
bool HasNSW = OBO->hasNoSignedWrap() && ID.getConstIntStartValue() &&
94+
ID.getConstIntStepValue() &&
95+
!(ID.getConstIntStartValue()->isNegative() ^
96+
ID.getConstIntStepValue()->isNegative());
97+
return VPIRFlags::WrapFlagsTy(OBO->hasNoUnsignedWrap(), HasNSW);
98+
}
9399

94100
assert(ID.getKind() == InductionDescriptor::IK_IntInduction &&
95101
"Expected int induction");

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
200200
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
201201
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
202202
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
203-
; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
203+
; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
204204
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
205205
; CHECK: vector.body:
206206
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -212,7 +212,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
212212
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4
213213
; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4
214214
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
215-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2)
215+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2)
216216
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
217217
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
218218
; CHECK: middle.block:
@@ -230,7 +230,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
230230
; CHECK-NEXT: [[IND_END4:%.*]] = add i64 [[START]], [[N_VEC3]]
231231
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
232232
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT8]], <2 x i64> poison, <2 x i32> zeroinitializer
233-
; CHECK-NEXT: [[INDUCTION10:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT9]], <i64 0, i64 1>
233+
; CHECK-NEXT: [[INDUCTION10:%.*]] = add nuw <2 x i64> [[DOTSPLAT9]], <i64 0, i64 1>
234234
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
235235
; CHECK: vec.epilog.vector.body:
236236
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
@@ -239,7 +239,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
239239
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX13]]
240240
; CHECK-NEXT: store <2 x i64> [[VEC_IND11]], ptr [[TMP7]], align 4
241241
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 2
242-
; CHECK-NEXT: [[VEC_IND_NEXT12]] = add nuw nsw <2 x i64> [[VEC_IND11]], splat (i64 2)
242+
; CHECK-NEXT: [[VEC_IND_NEXT12]] = add nuw <2 x i64> [[VEC_IND11]], splat (i64 2)
243243
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC3]]
244244
; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
245245
; CHECK: vec.epilog.middle.block:

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
2727
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
2828
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
2929
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
30-
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <vscale x 2 x i32> [[TMP8]], splat (i32 1)
31-
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <vscale x 2 x i32> [[DOTSPLAT]], [[TMP9]]
30+
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
31+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP9]]
3232
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
3333
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
3434
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -42,7 +42,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
4242
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[OFFSET_IDX]]
4343
; CHECK-NEXT: store <vscale x 2 x double> [[WIDE_LOAD]], ptr [[TMP18]], align 8
4444
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
45-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
45+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
4646
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4747
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4848
; CHECK: middle.block:

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n
391391
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[REVERSE2]], <vscale x 4 x i32> [[REVERSE3]])
392392
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP19]], align 4
393393
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
394-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
394+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
395395
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
396396
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
397397
; CHECK: middle.block:
@@ -595,7 +595,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
595595
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP4]], <vscale x 4 x ptr> align 8 [[TMP5]], <vscale x 4 x i1> splat (i1 true))
596596
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP7]], <vscale x 4 x ptr> align 8 [[TMP6]], <vscale x 4 x i1> splat (i1 true))
597597
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
598-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
598+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
599599
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
600600
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
601601
; CHECK: middle.block:
@@ -1590,7 +1590,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A,
15901590
; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv16i32(<vscale x 4 x i32> [[REVERSE6]], <vscale x 4 x i32> [[REVERSE7]], <vscale x 4 x i32> [[REVERSE8]], <vscale x 4 x i32> [[REVERSE9]])
15911591
; CHECK-NEXT: store <vscale x 16 x i32> [[INTERLEAVED_VEC11]], ptr [[TMP26]], align 4
15921592
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
1593-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
1593+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
15941594
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
15951595
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
15961596
; CHECK: middle.block:

llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -452,9 +452,9 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur
452452
; CHECK-NEXT: [[IND_END:%.*]] = add i32 3, [[TMP0]]
453453
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
454454
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
455-
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
456-
; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> splat (i32 3), [[TMP3]]
457-
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[X]], 4
455+
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
456+
; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw <4 x i32> splat (i32 3), [[TMP3]]
457+
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[X]], 4
458458
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
459459
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
460460
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -471,7 +471,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur
471471
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP8]], align 4
472472
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
473473
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
474-
; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]]
474+
; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]]
475475
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
476476
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
477477
; CHECK: middle.block:

llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -594,16 +594,16 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) {
594594
; IF-EVL-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
595595
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[N]], i64 0
596596
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
597-
; IF-EVL-NEXT: [[TMP3:%.*]] = mul nsw <vscale x 4 x i64> [[TMP4]], splat (i64 -1)
598-
; IF-EVL-NEXT: [[INDUCTION:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP3]]
597+
; IF-EVL-NEXT: [[TMP3:%.*]] = mul <vscale x 4 x i64> [[TMP4]], splat (i64 -1)
598+
; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP3]]
599599
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
600600
; IF-EVL: vector.body:
601601
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
602602
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
603603
; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP1]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
604604
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
605605
; IF-EVL-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
606-
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nsw i64 -1, [[TMP7]]
606+
; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 -1, [[TMP7]]
607607
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
608608
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
609609
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i32], ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
@@ -619,7 +619,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) {
619619
; IF-EVL-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP13]], [[WIDE_MASKED_GATHER5]]
620620
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP15]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP6]])
621621
; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
622-
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
622+
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
623623
; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
624624
; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
625625
; IF-EVL: middle.block:
@@ -646,9 +646,9 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) {
646646
; NO-VP-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
647647
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[N]], i64 0
648648
; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
649-
; NO-VP-NEXT: [[TMP8:%.*]] = mul nsw <vscale x 4 x i64> [[TMP7]], splat (i64 -1)
650-
; NO-VP-NEXT: [[INDUCTION:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP8]]
651-
; NO-VP-NEXT: [[TMP9:%.*]] = mul nsw i64 -1, [[TMP5]]
649+
; NO-VP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 -1)
650+
; NO-VP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP8]]
651+
; NO-VP-NEXT: [[TMP9:%.*]] = mul i64 -1, [[TMP5]]
652652
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
653653
; NO-VP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
654654
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -668,7 +668,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) {
668668
; NO-VP-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
669669
; NO-VP-NEXT: [[TMP16]] = add <vscale x 4 x i32> [[TMP14]], [[WIDE_MASKED_GATHER5]]
670670
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
671-
; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
671+
; NO-VP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
672672
; NO-VP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
673673
; NO-VP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
674674
; NO-VP: middle.block:

0 commit comments

Comments
 (0)