diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index 87ab9efeb703b..2c4a3495eb0b7 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -25,7 +25,7 @@ subroutine unroll(a) ! NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] ! NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]] ! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 - ! NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) + ! NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) ! ! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) ! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir index 89e8ce82d6f3f..1ccb6b1bd0315 100644 --- a/flang/test/Lower/HLFIR/unroll-loops.fir +++ b/flang/test/Lower/HLFIR/unroll-loops.fir @@ -27,7 +27,7 @@ func.func @unroll(%arg0: !fir.ref> {fir.bindc_name = "a // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] // NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]] // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 - // NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) + // NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) // UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 45b5570261416..540a99f9ea70f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7575,11 +7575,15 @@ createWidenInductionRecipes(VPInstruction *PhiR, assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start && "Start VPValue must match IndDesc's start value"); + // It is always safe to copy over the NoWrap and FastMath flags. In + // particular, when folding tail by masking, the masked-off lanes are never + // used, so it is safe. + VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc); VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); PHINode *Phi = cast(PhiR->getUnderlyingInstr()); return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), - IndDesc, PhiR->getDebugLoc()); + IndDesc, Flags, PhiR->getDebugLoc()); } VPHeaderPHIRecipe * @@ -7633,10 +7637,15 @@ VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI, PHINode *Phi = WidenIV->getPHINode(); VPValue *Start = WidenIV->getStartValue(); const InductionDescriptor &IndDesc = WidenIV->getInductionDescriptor(); + + // It is always safe to copy over the NoWrap and FastMath flags. In + // particular, when folding tail by masking, the masked-off lanes are never + // used, so it is safe. + VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc); VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), - IndDesc, I, VPI->getDebugLoc()); + return new VPWidenIntOrFpInductionRecipe( + Phi, Start, Step, &Plan.getVF(), IndDesc, I, Flags, VPI->getDebugLoc()); } VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5851b3ab7978c..58c2aecaa6bff 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2119,7 +2119,8 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their vector values. This is an abstract recipe and must be /// converted to concrete recipes before executing. -class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { +class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe, + public VPIRFlags { TruncInst *Trunc; // If this recipe is unrolled it will have 2 additional operands. @@ -2128,19 +2129,20 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, - DebugLoc DL) + const VPIRFlags &Flags, DebugLoc DL) : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, Step, IndDesc, DL), - Trunc(nullptr) { + VPIRFlags(Flags), Trunc(nullptr) { addOperand(VF); } VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, - TruncInst *Trunc, DebugLoc DL) + TruncInst *Trunc, const VPIRFlags &Flags, + DebugLoc DL) : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, Step, IndDesc, DL), - Trunc(Trunc) { + VPIRFlags(Flags), Trunc(Trunc) { addOperand(VF); SmallVector> Metadata; (void)Metadata; @@ -2154,7 +2156,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VPWidenIntOrFpInductionRecipe *clone() override { return new VPWidenIntOrFpInductionRecipe( getPHINode(), getStartValue(), getStepValue(), getVFValue(), - getInductionDescriptor(), Trunc, getDebugLoc()); + getInductionDescriptor(), Trunc, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 634df51a12965..a4f5e296945af 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -80,8 +80,13 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( VPValue *Start = Plan.getOrAddLiveIn(II->getStartValue()); VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep()); + // It is always safe to copy over the NoWrap and FastMath flags. In + // particular, when folding tail by masking, the masked-off lanes are + // never used, so it is safe. + VPIRFlags Flags = vputils::getFlagsFromIndDesc(*II); NewRecipe = new VPWidenIntOrFpInductionRecipe( - Phi, Start, Step, &Plan.getVF(), *II, Ingredient.getDebugLoc()); + Phi, Start, Step, &Plan.getVF(), *II, Flags, + Ingredient.getDebugLoc()); } } else { assert(isa(&Ingredient) && @@ -546,6 +551,8 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) { // only. if (!vputils::onlyScalarValuesUsed(WidenOriginalIV) || vputils::onlyFirstLaneUsed(WidenNewIV)) { + // Drop poison-generating flags when performing replacement. + WidenOriginalIV->dropPoisonGeneratingFlags(); WidenNewIV->replaceAllUsesWith(WidenOriginalIV); WidenNewIV->eraseFromParent(); return; @@ -3227,16 +3234,13 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); Instruction::BinaryOps AddOp; Instruction::BinaryOps MulOp; - // FIXME: The newly created binary instructions should contain nsw/nuw - // flags, which can be found from the original scalar operations. - VPIRFlags Flags; + VPIRFlags Flags = *WidenIVR; if (ID.getKind() == InductionDescriptor::IK_IntInduction) { AddOp = Instruction::Add; MulOp = Instruction::Mul; } else { AddOp = ID.getInductionOpcode(); MulOp = Instruction::FMul; - Flags = ID.getInductionBinOp()->getFastMathFlags(); } // If the phi is truncated, truncate the start and step values. diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index c21a0e70c1392..c4d53c5e508db 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -71,6 +71,19 @@ std::optional getRecipesForUncountableExit(VPlan &Plan, SmallVectorImpl &Recipes, SmallVectorImpl &GEPs); + +/// Extracts and returns NoWrap and FastMath flags from the induction binop in +/// \p ID. +inline VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID) { + if (ID.getKind() == InductionDescriptor::IK_FpInduction) + return ID.getInductionBinOp()->getFastMathFlags(); + + if (auto *OBO = dyn_cast_if_present( + ID.getInductionBinOp())) + return VPIRFlags::WrapFlagsTy(OBO->hasNoUnsignedWrap(), + OBO->hasNoSignedWrap()); + return {}; +} } // namespace vputils //===----------------------------------------------------------------------===// diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 0415b01d78b46..ac8095ae5c3e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -14,8 +14,8 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP8]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -76,8 +76,8 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP8]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index f16351720b20f..07ad96e12ebd5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -660,16 +660,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]] +; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] ; COMMON: [[PRED_STORE_IF13]]: ; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 ; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT]] +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; COMMON: [[PRED_STORE_CONTINUE14]]: +; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br label %[[EXIT:.*]] ; COMMON: [[EXIT]]: -; COMMON-NEXT: br label %[[SCALAR_PH:.*]] -; COMMON: [[SCALAR_PH]]: -; COMMON-NEXT: br label %[[EXIT1:.*]] -; COMMON: [[EXIT1]]: ; COMMON-NEXT: ret void ; entry: @@ -1110,7 +1110,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 ; DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DST]], align 4 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index a49f089bd2085..2180f18750bf2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -35,7 +35,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP10]] = select <16 x i1> [[TMP17]], <16 x i8> [[VEC_IND]], <16 x i8> [[VEC_PHI]] ; CHECK-NEXT: [[TMP11]] = select <16 x i1> [[TMP23]], <16 x i8> [[STEP_ADD]], <16 x i8> [[VEC_PHI2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[STEP_ADD]], splat (i8 16) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i8> [[STEP_ADD]], splat (i8 16) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -48,7 +48,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -62,11 +62,11 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT10]], <8 x i8> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT11]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <8 x i8> [[DOTSPLAT11]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <8 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <8 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[IV:%.*]] = trunc i32 [[INDEX6]] to i8 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV]] @@ -74,9 +74,9 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD12]], splat (i8 3) ; CHECK-NEXT: [[TMP20]] = select <8 x i1> [[TMP19]], <8 x i8> [[VEC_IND7]], <8 x i8> [[VEC_PHI9]] ; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i32 [[INDEX6]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <8 x i8> [[VEC_IND7]], splat (i8 8) +; CHECK-NEXT: [[VEC_IND_NEXT13]] = add nuw nsw <8 x i8> [[VEC_IND7]], splat (i8 8) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT13]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP21]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP21]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP22:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP20]]) ; CHECK-NEXT: [[RDX_SELECT_CMP14:%.*]] = icmp ne i8 [[TMP22]], -128 @@ -96,7 +96,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[SEL]] = select i1 [[C]], i8 [[IV1]], i8 [[RDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i8 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[SEL_LCSSA]] @@ -158,7 +158,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) ; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP5]]) @@ -170,7 +170,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[FR]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -197,7 +197,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], splat (i32 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC8]] -; CHECK-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP14]]) ; CHECK-NEXT: [[RDX_SELECT_CMP18:%.*]] = icmp ne i32 [[TMP16]], -2147483648 @@ -216,7 +216,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[RED_NEXT]] = select i1 [[C]], i32 [[IV_TRUNC]], i32 [[RED]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index d23e3c29b59e5..3010a9d75d039 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -46,7 +46,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 @@ -128,7 +128,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -136,14 +136,14 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -151,7 +151,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX5]] ; CHECK-NEXT: store <2 x i64> [[VEC_IND6]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw <2 x i64> [[VEC_IND6]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -200,7 +200,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -212,7 +212,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -221,7 +221,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -230,7 +230,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[IND_END4:%.*]] = add i64 [[START]], [[N_VEC3]] ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT8]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION10:%.*]] = add <2 x i64> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION10:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -239,7 +239,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX13]] ; CHECK-NEXT: store <2 x i64> [[VEC_IND11]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add nuw nsw <2 x i64> [[VEC_IND11]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -296,7 +296,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IND_END4]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -304,14 +304,14 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -320,7 +320,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], splat (i64 10) ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add nuw nsw <2 x i64> [[VEC_IND8]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[IND_END]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -410,19 +410,19 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP1]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i8> [[STEP_ADD]], splat (i8 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i8 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i8> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -430,7 +430,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX2]] ; CHECK-NEXT: store <2 x i8> [[VEC_IND3]], ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX2]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2) +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add nuw nsw <2 x i8> [[VEC_IND3]], splat (i8 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 10000 ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index f50d0834c5dc8..75f256085a17a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -198,7 +198,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[PRED_STORE_CONTINUE35]] ] +; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ] ; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14) ; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]] ; DEFAULT-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1) @@ -353,7 +353,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT: [[PRED_STORE_CONTINUE35]]: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) -; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) +; DEFAULT-NEXT: [[VEC_IND_NEXT36]] = add nuw nsw <16 x i8> [[VEC_IND1]], splat (i8 16) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -454,8 +454,8 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() -; DEFAULT-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) -; DEFAULT-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw nsw [[TMP10]], splat (i8 1) +; DEFAULT-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP11]] ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -477,7 +477,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -504,8 +504,8 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() -; OPTSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) -; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; OPTSIZE-NEXT: [[TMP11:%.*]] = mul nuw nsw [[TMP10]], splat (i8 1) +; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP11]] ; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; OPTSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; OPTSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -527,7 +527,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; OPTSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; OPTSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true -; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -554,8 +554,8 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() -; MINSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) -; MINSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; MINSIZE-NEXT: [[TMP11:%.*]] = mul nuw nsw [[TMP10]], splat (i8 1) +; MINSIZE-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP11]] ; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; MINSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; MINSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -577,7 +577,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; MINSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; MINSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true -; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 5b61fba4ae994..5f8d7e7d24cc4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -19,8 +19,8 @@ define void @foo() { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP4]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -44,7 +44,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP14:%.*]] = phi [ [[TMP10]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[TMP14]], align 4 [[TMP6]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index 6d0777e42ab0e..2edbec4681ab0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -51,7 +51,7 @@ define void @foo_i32(i32 %n) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -113,7 +113,7 @@ define void @foo_i64(i64 %n) { ; CHECK-NEXT: br i1 [[TMP5]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll index 1dcd665817196..f67a3d9be408a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll @@ -320,7 +320,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; CHECK: [[PRED_STORE_CONTINUE12]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 2521ece2eea06..e338b828d2520 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -27,8 +27,8 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i32 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw [[TMP8]], splat (i32 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw [[DOTSPLAT]], [[TMP9]] ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -42,7 +42,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP18]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll index 1213d974e75ef..f8be8d5b62031 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll @@ -43,7 +43,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; SC_SVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; SC_SVE-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], [[VEC_PHI]] ; SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SC_SVE: middle.block: @@ -114,7 +114,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; NO_SC_SVE-NEXT: [[TMP16:%.*]] = shl <8 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; NO_SC_SVE-NEXT: [[TMP17]] = add <8 x i32> [[TMP16]], [[VEC_PHI]] ; NO_SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; NO_SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO_SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO_SC_SVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll index 3c0455938be80..337c097c85712 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -36,7 +36,7 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr align 4 [[TMP11]], [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 28d2a278de498..e90f8d09fc7ab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -106,7 +106,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -128,7 +128,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP10]], [[TMP12]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -190,7 +190,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -214,7 +214,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr @CD_i16, [[TMP9]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP14]], align 2 [[TMP15]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -291,7 +291,7 @@ define i32 @test_struct_load6(ptr %S) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP14]], [[WIDE_MASKED_GATHER5]] ; CHECK-NEXT: [[TMP16]] = sub [[TMP12]], [[TMP15]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -391,7 +391,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[REVERSE2]], [[REVERSE3]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -595,7 +595,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP4]], align 8 [[TMP5]], splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP7]], align 8 [[TMP6]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -821,7 +821,7 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], align 4 [[TMP13]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -894,7 +894,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) #1 { ; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP17]] = add [[TMP16]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -968,7 +968,7 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], align 4 [[TMP14]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -1044,7 +1044,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP18]] = add [[TMP17]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: @@ -1111,7 +1111,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP10]], splat (i64 1) +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -1125,7 +1125,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[P]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1188,8 +1188,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl [[TMP10]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP21]], splat (i64 3) +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw [[TMP10]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw [[TMP19]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -1206,7 +1206,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], align 4 [[TMP17]], splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], align 4 [[TMP15]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: @@ -1273,7 +1273,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNOT]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -1301,7 +1301,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: middle.block: @@ -1394,7 +1394,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, [[TMP10]], i64 8 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP17]], align 4 [[TMP25]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: middle.block: @@ -1590,7 +1590,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = call @llvm.vector.interleave4.nxv16i32( [[REVERSE6]], [[REVERSE7]], [[REVERSE8]], [[REVERSE9]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC11]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 02cc499f18827..82dff2f8aa2f7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -60,7 +60,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP4]], [[TMP4]]) ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP12]], [[INTERLEAVED_MASK3]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -198,7 +198,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP9]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP10]], [[TMP7]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -328,7 +328,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP10]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP11]], [[TMP8]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT4]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -483,7 +483,7 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP4]], [[TMP4]], [[TMP4]], [[TMP4]]) ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP17]], [[INTERLEAVED_MASK3]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 945d808d3fa3f..8cc9e431e6214 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -164,9 +164,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP13]], splat (i64 4) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = mul nsw [[TMP13]], splat (i64 4) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i64 4, [[TMP4]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -182,7 +182,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll index baf050c7facee..0617d2937f824 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll @@ -90,7 +90,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe ; NORMAL_DEP_LIMIT-NEXT: [[TMP15:%.*]] = add [[WIDE_LOAD10]], [[VEC_IND]] ; NORMAL_DEP_LIMIT-NEXT: store [[TMP15]], ptr [[TMP14]], align 4, !alias.scope [[META7]], !noalias [[META0]] ; NORMAL_DEP_LIMIT-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP8]] -; NORMAL_DEP_LIMIT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NORMAL_DEP_LIMIT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NORMAL_DEP_LIMIT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; NORMAL_DEP_LIMIT-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NORMAL_DEP_LIMIT: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 871d9be609bd7..ca4faf4a0a1c9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -625,7 +625,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index 0b0e2d4154cb6..9f62c7dcda65a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -351,8 +351,8 @@ define void @test_stride_noninvar_4i32(ptr readonly %data, ptr noalias nocapture ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], splat (i32 32) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <4 x i32> [[VEC_IND2]], splat (i32 32) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -452,9 +452,9 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[IND_END:%.*]] = add i32 3, [[TMP0]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> splat (i32 3), [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[X]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -470,8 +470,8 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 6ea075f76aed4..318885fc7164e 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -194,7 +194,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[PRED_STORE_CONTINUE35]] ] +; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ] ; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14) ; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]] ; DEFAULT-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1) @@ -349,7 +349,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT: [[PRED_STORE_CONTINUE35]]: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) -; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) +; DEFAULT-NEXT: [[VEC_IND_NEXT36]] = add nuw nsw <16 x i8> [[VEC_IND1]], splat (i8 16) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 4af40b711726d..9ea95658818fe 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -508,7 +508,7 @@ define dso_local void @select_not_allowed(ptr noalias nocapture %A, ptr noalias ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 2087218bf3ea3..adf443b74acf1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -14,21 +14,21 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw [[TMP11]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP17]] +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 3, [[TMP17]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -69,21 +69,21 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP5]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw [[TMP5]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = mul nsw i64 3, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -124,21 +124,21 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP5]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw [[TMP5]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = mul nsw i64 3, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -186,21 +186,21 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw [[TMP11]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP17]] +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 3, [[TMP17]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -248,21 +248,21 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP5]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw [[TMP5]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = mul nsw i64 3, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -319,15 +319,15 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = xor [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw [[TMP11]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP5]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP27]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 3, [[TMP12]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] @@ -340,7 +340,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index f25b86d3b20c2..23df6d8ccf6ba 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -354,8 +354,8 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP9]], splat (i64 2) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw [[TMP9]], splat (i64 2) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -363,7 +363,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP16]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 2, [[TMP16]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2 @@ -378,7 +378,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP18]], align 4 [[TMP19]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 21272cb72f4d6..69d83db49fd18 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -11,8 +11,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: br label [[ENTRY:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP9]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index 5c78cfd6daded..b62d6da2f2e2d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -44,7 +44,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP4]], [[TMP4]]) ; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP12]], [[INTERLEAVED_MASK3]]) ; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: @@ -222,7 +222,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP4]], [[TMP4]], [[TMP4]], [[TMP4]]) ; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP17]], [[INTERLEAVED_MASK3]]) ; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_EPILOGUE-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll index f36919f98dd00..b436a46842eab 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll @@ -22,7 +22,7 @@ define void @store_factor_2_with_tail_gap(i64 %n, ptr %a) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v32i64.p0(<32 x i64> [[INTERLEAVED_VEC]], ptr align 8 [[TMP1]], <32 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 06b47aa6551a0..77ee3c08329d5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -16,8 +16,8 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) -; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 1cbec47d72203..f49f9284a1e93 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -46,15 +46,15 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: ; RV32-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() -; RV32-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) -; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV32-NEXT: [[TMP9:%.*]] = mul nuw nsw [[TMP7]], splat (i64 16) +; RV32-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP9]] ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; RV32-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 -; RV32-NEXT: [[TMP11:%.*]] = mul i64 16, [[TMP8]] +; RV32-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]] ; RV32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] @@ -68,7 +68,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3]], !noalias [[META5]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; RV32: middle.block: @@ -115,15 +115,15 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; RV64: vector.ph: ; RV64-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() -; RV64-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) -; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV64-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP7]], splat (i64 16) +; RV64-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; RV64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 16, [[TMP8]] +; RV64-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]] ; RV64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] @@ -137,7 +137,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV64-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index 8e562a97d51cf..02c363bb54457 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -205,7 +205,7 @@ define void @safe_load_store_distance_not_pow_of_2(i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [10 x [12 x i16]], ptr @a, i64 0, i64 8, <8 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> zeroinitializer, <8 x ptr> align 2 [[TMP7]], <8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 24) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 24) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 723b5e9cc280d..414e5d9295554 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -115,15 +115,15 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 64) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP6]], splat (i64 64) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 64, [[TMP11]] +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 64, [[TMP11]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] @@ -131,7 +131,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP13]], align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -156,8 +156,8 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-UF2-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 64 ; CHECK-UF2-NEXT: [[TMP6:%.*]] = mul [[BROADCAST_SPLAT]], splat (i64 64) ; CHECK-UF2-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 64) -; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP7]], splat (i64 64) +; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; CHECK-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF2: vector.body: ; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -172,7 +172,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], align 4 [[TMP9]], splat (i1 true)) ; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP12]], align 4 [[TMP10]], splat (i1 true)) ; CHECK-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-UF2-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[TMP6]] +; CHECK-UF2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[TMP6]] ; CHECK-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UF2: middle.block: @@ -1339,8 +1339,8 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) -; NOSTRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw [[TMP5]], splat (i64 1) +; NOSTRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP6]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED-UF2: vector.body: ; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1357,7 +1357,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8 ; NOSTRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8 ; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; NOSTRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT]] +; NOSTRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[BROADCAST_SPLAT]] ; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NOSTRIDED-UF2: middle.block: @@ -1423,8 +1423,8 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-UF2-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; STRIDED-UF2-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) -; STRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; STRIDED-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw [[TMP5]], splat (i64 1) +; STRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP6]] ; STRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED-UF2: vector.body: ; STRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1441,7 +1441,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8 ; STRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8 ; STRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; STRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT]] +; STRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[BROADCAST_SPLAT]] ; STRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; STRIDED-UF2: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index 8ab0f6f4c14f1..317bcde2f4670 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -1184,8 +1184,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv2i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP9]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1222,8 +1222,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1235,7 +1235,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store [[TMP10]], ptr [[TMP11]], align 8 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index 34a82757eccc0..c7003560721bc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -380,8 +380,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP: vector.ph: ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i32 1) -; IF-EVL-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw nsw [[TMP10]], splat (i32 1) +; IF-EVL-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP2]] ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ] @@ -400,7 +400,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP20]], [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: @@ -415,8 +415,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i32 1) -; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP9]], splat (i32 1) +; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-INLOOP: vector.body: ; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ] @@ -435,7 +435,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP19]], [[EVL_BASED_IV1]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-INLOOP: middle.block: @@ -457,8 +457,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) -; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP12]], splat (i32 1) +; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -473,7 +473,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = select [[TMP27]], [[WIDE_MASKED_LOAD]], zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -513,8 +513,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) -; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i32 1) +; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -530,7 +530,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[VEC_PHI]], [[TMP17]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-INLOOP: middle.block: @@ -584,8 +584,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP: vector.ph: ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = mul [[TMP10]], splat (i32 1) -; IF-EVL-OUTLOOP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP13]] +; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw nsw [[TMP10]], splat (i32 1) +; IF-EVL-OUTLOOP-NEXT: [[INDUCTION1:%.*]] = add nuw nsw zeroinitializer, [[TMP2]] ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -604,7 +604,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] -; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND2]], [[BROADCAST_SPLAT2]] +; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw [[VEC_IND2]], [[BROADCAST_SPLAT2]] ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: @@ -619,8 +619,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i32 1) -; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP9]], splat (i32 1) +; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-INLOOP: vector.body: ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -638,7 +638,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-INLOOP: middle.block: @@ -660,8 +660,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) -; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP12]], splat (i32 1) +; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -676,7 +676,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP28]], [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -720,8 +720,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) -; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i32 1) +; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -737,7 +737,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[VEC_PHI]], [[TMP17]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-INLOOP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index c7ba826295de8..a3bec999425a3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -545,8 +545,8 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP18]], 2 ; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 ; IF-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; IF-EVL-NEXT: [[TMP12:%.*]] = mul [[TMP6]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] +; IF-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP13]], 2 ; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP19]], 1 @@ -589,8 +589,8 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -607,7 +607,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store [[TMP13]], ptr [[TMP11]], align 8 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index a07e031418762..b5662b0bd8d3b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -119,8 +119,8 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP2]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -163,8 +163,8 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[TMP5]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP7]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -181,7 +181,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP12]], splat (i1 true), poison) ; NO-VP-NEXT: [[TMP13]] = add [[TMP11]], [[WIDE_MASKED_GATHER2]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: middle.block: @@ -249,8 +249,8 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i32 1) -; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP5]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP4]], splat (i32 1) +; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] @@ -285,8 +285,8 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) -; NO-VP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP7]], splat (i32 1) +; NO-VP-NEXT: [[INDUCTION1:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -300,7 +300,7 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], [[VEC_IND4]], i32 3 ; NO-VP-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[VEC_IND4]], align 4 [[TMP13]], splat (i1 true)) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP9]] -; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add [[VEC_IND4]], [[BROADCAST_SPLAT3]] +; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw [[VEC_IND4]], [[BROADCAST_SPLAT3]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: middle.block: @@ -359,8 +359,8 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP2]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -403,8 +403,8 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[TMP5]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP7]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -421,7 +421,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP12]], splat (i1 true), poison) ; NO-VP-NEXT: [[TMP13]] = add [[TMP11]], [[WIDE_MASKED_GATHER2]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: middle.block: @@ -490,8 +490,8 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i32 1) -; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP5]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP4]], splat (i32 1) +; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] @@ -526,8 +526,8 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) -; NO-VP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP7]], splat (i32 1) +; NO-VP-NEXT: [[INDUCTION1:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -541,7 +541,7 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], [[VEC_IND4]], i32 2 ; NO-VP-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[VEC_IND4]], align 4 [[TMP13]], splat (i1 true)) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP9]] -; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add [[VEC_IND4]], [[BROADCAST_SPLAT3]] +; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw [[VEC_IND4]], [[BROADCAST_SPLAT3]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: middle.block: @@ -594,8 +594,8 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[N]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 -1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT]], [[TMP5]] +; IF-EVL-NEXT: [[TMP3:%.*]] = mul nsw [[TMP4]], splat (i64 -1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add nsw [[BROADCAST_SPLAT]], [[TMP3]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -603,7 +603,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP1]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 -1, [[TMP7]] +; IF-EVL-NEXT: [[TMP8:%.*]] = mul nsw i64 -1, [[TMP7]] ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i32], ptr [[A:%.*]], [[VEC_IND]], i32 0 @@ -619,7 +619,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[TMP15:%.*]] = add [[TMP13]], [[WIDE_MASKED_GATHER5]] ; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP15]], [[VEC_PHI]], i32 [[TMP6]]) ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: @@ -646,9 +646,9 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i64() ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[N]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 -1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT]], [[TMP8]] -; NO-VP-NEXT: [[TMP9:%.*]] = mul i64 -1, [[TMP5]] +; NO-VP-NEXT: [[TMP8:%.*]] = mul nsw [[TMP7]], splat (i64 -1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nsw [[BROADCAST_SPLAT]], [[TMP8]] +; NO-VP-NEXT: [[TMP9:%.*]] = mul nsw i64 -1, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -668,7 +668,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP15]], splat (i1 true), poison) ; NO-VP-NEXT: [[TMP16]] = add [[TMP14]], [[WIDE_MASKED_GATHER5]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; NO-VP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 1e21c753840e9..0375f0a8fd132 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -249,8 +249,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; SCALABLE: [[VECTOR_BODY]]: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -298,7 +298,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: [[MIDDLE_BLOCK]]: @@ -330,8 +330,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP5]], splat (i64 1) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; TF-SCALABLE: [[VECTOR_BODY]]: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -590,8 +590,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; SCALABLE-NEXT: [[TMP13:%.*]] = mul [[TMP6]], splat (i64 1) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; SCALABLE: [[VECTOR_BODY]]: ; SCALABLE-NEXT: [[TMP10:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -661,8 +661,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP5]], splat (i64 1) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; TF-SCALABLE: [[VECTOR_BODY]]: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -712,8 +712,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; SCALABLE: [[VECTOR_BODY]]: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -760,7 +760,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXEDLEN: [[MIDDLE_BLOCK]]: @@ -793,8 +793,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP5]], splat (i64 1) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; TF-SCALABLE: [[VECTOR_BODY]]: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 193b5d4a788dc..69505eb176f50 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -48,7 +48,7 @@ define void @PR31671(float %x, ptr %d) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP1]] ; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP4]], <16 x ptr> align 4 [[TMP2]], <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 80) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 80) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6384 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index c5f581fad41f5..3165422dcc539 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -34,7 +34,7 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[TMP11]] = or <4 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP11]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index ed288d2f99a0b..fd76aa779bfb0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -31,7 +31,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[IV_START]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -46,7 +46,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[ARR:%.*]], i64 [[TMP16]] ; CHECK-NEXT: store <16 x i16> [[TMP15]], ptr [[TMP17]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -55,7 +55,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IV_START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -65,7 +65,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT9]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION11:%.*]] = add <4 x i32> [[DOTSPLAT10]], +; CHECK-NEXT: [[INDUCTION11:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT10]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -80,9 +80,9 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP27]] ; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP28]], align 2 ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX8]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT13]] = add <4 x i32> [[VEC_IND12]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT13]] = add nuw nsw <4 x i32> [[VEC_IND12]], splat (i32 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -101,7 +101,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: store i16 [[STORE_VAL]], ptr [[ADDR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV_TRUNC]], 91 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -179,7 +179,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[DOTCAST9:%.*]] = trunc i64 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END10:%.*]] = mul i16 [[DOTCAST9]], [[TMP0]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -208,7 +208,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[L]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N25]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -224,7 +224,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[L]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -265,12 +265,12 @@ define i8 @multiple_inductions_start_at_0() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 128 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[STEP_ADD_3]], zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[STEP_ADD_3]], i32 31 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -285,7 +285,7 @@ define i8 @multiple_inductions_start_at_0() { ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <4 x i8> [[VEC_IND2]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1052 -; CHECK-NEXT: br i1 [[TMP3]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[VEC_IND2]], i32 3 ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -300,7 +300,7 @@ define i8 @multiple_inductions_start_at_0() { ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[IV_2]] to i8 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ugt i32 [[IV]], 1050 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i8 [ [[TRUNC]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[RES]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index cc84fabd00ecc..69c6ce3a14a17 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -222,7 +222,7 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 36 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index db592f959bace..0bac10a41640e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -134,7 +134,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; AVX512: middle.block: @@ -180,7 +180,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FVW2: middle.block: @@ -249,7 +249,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AVX512: middle.block: @@ -295,7 +295,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE2]] ; FVW2: pred.store.continue2: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FVW2: middle.block: @@ -351,7 +351,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; AVX512: middle.block: @@ -397,7 +397,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FVW2: middle.block: @@ -452,7 +452,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AVX512: middle.block: @@ -498,7 +498,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FVW2: middle.block: @@ -553,7 +553,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AVX512: middle.block: @@ -599,7 +599,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FVW2: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 9240484c6998b..5eeebf2009a62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -370,7 +370,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> ; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4, !alias.scope [[META13]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll index 0678e9eea4c35..b6d26b4ab46be 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll @@ -35,8 +35,8 @@ define void @avoid_sinking_store_across_load(ptr %arr) { ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> align 4 [[TMP4]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 12) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <4 x i64> [[VEC_IND2]], splat (i64 12) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 6558f761142f0..932153a23bdbd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1021,7 +1021,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A]], <8 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> [[TMP5]], <8 x ptr> align 8 [[TMP6]], <8 x i1> [[TMP1]]), !alias.scope [[META29:![0-9]+]], !noalias [[META31:![0-9]+]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 128) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i64> [[VEC_IND]], splat (i64 128) ; AVX512-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 ; AVX512-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; AVX512: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 5e1850be132bd..a8c5bb0ee6f3b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -173,7 +173,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; CHECK-NEXT: store <64 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], splat (i32 64) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <64 x i32> [[VEC_IND]], splat (i32 64) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -198,7 +198,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; AUTOVF-NEXT: store <8 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP3]], align 4 ; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; AUTOVF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; AUTOVF-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 113bb7a7f2aca..4eaadcb93e3de 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -50,7 +50,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index ff5020cd60138..1350e40c77e66 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -29,7 +29,7 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP6]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index e7f56a45ebdc6..34c54de2140cc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -38,7 +38,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll index e888ad3b8eb4e..fb5a5f9c068b9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll @@ -36,7 +36,7 @@ define void @test_pr55096(i64 %c, ptr %p) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 340 ; CHECK-NEXT: br i1 [[TMP15]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index ba7db65d745a9..08855fe9ecba5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -35,7 +35,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 8 [[TMP8]], <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index e405fe7c6f764..9ef4e205a970d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -59,8 +59,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> align 8 [[TMP16]], <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 32) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <16 x i64> [[VEC_IND3]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <16 x i64> [[VEC_IND3]], splat (i64 32) ; CHECK-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP63]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -83,15 +83,15 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END11:%.*]] = mul i64 [[N_VEC7]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT]], ; CHECK-NEXT: [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT18:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT17]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION19:%.*]] = add <8 x i64> [[DOTSPLAT18]], +; CHECK-NEXT: [[INDUCTION9:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT18]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <8 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i64> [ [[INDUCTION19]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <8 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i64> [ [[INDUCTION9]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT15:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND15]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND15]] ; CHECK-NEXT: [[TMP20:%.*]] = add nsw <8 x i64> [[TMP18]], [[VEC_IND20]] @@ -102,8 +102,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP23]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> align 8 [[TMP24]], <8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[INDEX14]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <8 x i64> [[VEC_IND15]], splat (i64 16) -; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i64> [[VEC_IND20]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT14]] = add nuw nsw <8 x i64> [[VEC_IND15]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT15]] = add nuw nsw <8 x i64> [[VEC_IND20]], splat (i64 16) ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[TMP25]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -134,8 +134,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: br label %[[VECTOR_BODY30:.*]] ; CHECK: [[VECTOR_BODY30]]: ; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, %[[VECTOR_PH25]] ], [ [[INDEX_NEXT39:%.*]], %[[VECTOR_BODY30]] ] -; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], %[[VECTOR_BODY30]] ] -; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT38:%.*]], %[[VECTOR_BODY30]] ] +; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT35:%.*]], %[[VECTOR_BODY30]] ] +; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], %[[VECTOR_BODY30]] ] ; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND35]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]] ; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] @@ -148,8 +148,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> align 16 [[TMP33]], <16 x i1> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> align 8 [[TMP37]], <16 x i1> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[INDEX_NEXT39]] = add nuw i64 [[INDEX34]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) -; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT35]] = add nuw nsw <16 x i64> [[VEC_IND35]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT36]] = add nuw nsw <16 x i64> [[VEC_IND37]], splat (i64 32) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK37:.*]], label %[[VECTOR_BODY30]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK37]]: @@ -175,15 +175,15 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true) ; CHECK-NEXT: [[DOTSPLATINSERT62:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL42]], i64 0 ; CHECK-NEXT: [[DOTSPLAT63:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT62]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION64:%.*]] = add <8 x i64> [[DOTSPLAT63]], +; CHECK-NEXT: [[INDUCTION52:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT63]], ; CHECK-NEXT: [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0 ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], +; CHECK-NEXT: [[INDUCTION55:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT68]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY56:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY56]]: ; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL39]], %[[VEC_EPILOG_PH45]] ], [ [[INDEX_NEXT74:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] -; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT66:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] -; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT71:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] +; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION52]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT61:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] +; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION55]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT62:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] ; CHECK-NEXT: [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] @@ -196,8 +196,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> align 16 [[TMP47]], <8 x i1> [[BROADCAST_SPLAT73]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> align 8 [[TMP51]], <8 x i1> [[BROADCAST_SPLAT73]]) ; CHECK-NEXT: [[INDEX_NEXT74]] = add nuw i64 [[INDEX61]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) -; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT61]] = add nuw nsw <8 x i64> [[VEC_IND65]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT62]] = add nuw nsw <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[TMP55]], label %[[VEC_EPILOG_MIDDLE_BLOCK63:.*]], label %[[VEC_EPILOG_VECTOR_BODY56]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK63]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll index eca70b3af159c..86ac78be5bda9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll @@ -29,7 +29,7 @@ define void @load_store_interleave_group_with_gaps(ptr noalias %data, i64 nounde ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[DATA]], <4 x i64> [[TMP5]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[STRIDED_VEC2]], <4 x ptr> align 8 [[TMP6]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index efc9a4fa57292..8184cad22ae8b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -44,7 +44,7 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr align 4 [[TMP34]], <4 x i1> [[TMP18]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP15]], ptr align 4 [[TMP35]], <4 x i1> [[TMP19]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index f9570405ecabc..b5d42a8f71430 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -39,7 +39,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -124,7 +124,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr align 1 [[TMP50]], <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -139,7 +139,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> @@ -148,7 +148,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr align 1 [[TMP3]], <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.body: @@ -212,7 +212,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -297,7 +297,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr align 1 [[TMP50]], <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -312,7 +312,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> @@ -322,7 +322,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr align 1 [[TMP4]], <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -903,7 +903,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read ; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[TMP32]], ptr [[TMP33]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1162,7 +1162,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -1472,7 +1472,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1487,7 +1487,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> @@ -1500,7 +1500,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP6]], <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -1877,7 +1877,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 -8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -2202,7 +2202,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; ENABLED_MASKED_STRIDED: pred.store.continue60: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 -8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index 500e60372ac13..cfc588a7f5d89 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -64,7 +64,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP26]], ptr [[TMP21]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP27]], ptr [[TMP23]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -352,7 +352,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; DISABLED_MASKED_STRIDED: pred.store.continue6: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -405,7 +405,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; ENABLED_MASKED_STRIDED: pred.store.continue6: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index 67fe87a328976..5999a3581e467 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -339,7 +339,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP1]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; SINK-GATHER-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-GATHER-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SINK-GATHER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index b41ddebc0c362..a9a0b33f542af 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -161,7 +161,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll index b59ad8481597c..af72f3641a635 100644 --- a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll @@ -17,7 +17,7 @@ define void @_Z3foov() { ; CHECK: vector.ph: ; CHECK: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK: br i1 [[TMP6:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: br i1 [[TMP4:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: ; CHECK: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: @@ -28,7 +28,7 @@ define void @_Z3foov() { ; CHECK-MASKED: vector.ph: ; CHECK-MASKED: br label [[VECTOR_BODY:%.*]] ; CHECK-MASKED: vector.body: -; CHECK-MASKED: br i1 [[TMP18:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-MASKED: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK-MASKED: middle.block: ; CHECK-MASKED: br label [[FOR_BODY:%.*]] ; CHECK-MASKED: for.cond.cleanup: @@ -41,15 +41,15 @@ define void @_Z3foov() { ; CHECK-SCALABLE: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-SCALABLE: br label [[VECTOR_BODY:%.*]] ; CHECK-SCALABLE: vector.body: -; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] -; CHECK-SCALABLE: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add nuw nsw [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] +; CHECK-SCALABLE: br i1 [[TMP11:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-SCALABLE: middle.block: -; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF6:![0-9]+]] ; CHECK-SCALABLE: scalar.ph: ; CHECK-SCALABLE: br label [[FOR_BODY:%.*]] ; CHECK-SCALABLE: for.cond.cleanup: ; CHECK-SCALABLE: for.body: -; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; entry: br label %for.body @@ -80,14 +80,14 @@ define void @_Z3foo2v() { ; CHECK: vector.ph: ; CHECK: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK: br i1 [[TMP6:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: br i1 [[TMP4:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK: br label [[SCALAR_PH:%.+]] +; CHECK: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: ; CHECK: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK: for.body: -; CHECK: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; ; CHECK-MASKED-LABEL: @_Z3foo2v( ; CHECK-MASKED: entry: @@ -95,14 +95,14 @@ define void @_Z3foo2v() { ; CHECK-MASKED: vector.ph: ; CHECK-MASKED: br label [[VECTOR_BODY:%.*]] ; CHECK-MASKED: vector.body: -; CHECK-MASKED: br i1 [[TMP18:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-MASKED: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-MASKED: middle.block: -; CHECK-MASKED: br label [[SCALAR_PH:%.+]] +; CHECK-MASKED: br label [[SCALAR_PH:%.*]] ; CHECK-MASKED: scalar.ph: ; CHECK-MASKED: br label [[FOR_BODY:%.*]] ; CHECK-MASKED: for.cond.cleanup: ; CHECK-MASKED: for.body: -; CHECK-MASKED: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-MASKED: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; ; CHECK-SCALABLE-LABEL: @_Z3foo2v( ; CHECK-SCALABLE: entry: @@ -112,15 +112,15 @@ define void @_Z3foo2v() { ; CHECK-SCALABLE: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-SCALABLE: br label [[VECTOR_BODY:%.*]] ; CHECK-SCALABLE: vector.body: -; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] -; CHECK-SCALABLE: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add nuw nsw [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] +; CHECK-SCALABLE: br i1 [[TMP11:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-SCALABLE: middle.block: -; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF5]] +; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF6]] ; CHECK-SCALABLE: scalar.ph: ; CHECK-SCALABLE: br label [[FOR_BODY:%.*]] ; CHECK-SCALABLE: for.cond.cleanup: ; CHECK-SCALABLE: for.body: -; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 45405511abe90..1fe3962dfd072 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -1612,7 +1612,7 @@ define void @pr61396_pointer_used_as_both_stored_value_and_pointer_operand_by_st ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -1642,7 +1642,7 @@ define void @pr61396_pointer_used_as_both_stored_value_and_pointer_operand_by_st ; INTER-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 ; INTER-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; INTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; INTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; INTER-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; INTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 ; INTER-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; INTER: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll index 49eb8b349a274..5d92c127aff93 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll @@ -87,7 +87,7 @@ define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n ; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 5177d7b6e0090..1773b2aab7d87 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -43,12 +43,13 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -102,8 +103,9 @@ define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ] @@ -172,12 +174,13 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -247,12 +250,13 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -322,12 +326,13 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -397,12 +402,13 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -468,12 +474,13 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -543,12 +550,13 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -621,7 +629,7 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -679,8 +687,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -750,8 +759,9 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ] @@ -803,8 +813,9 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -857,8 +868,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -928,8 +940,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -999,8 +1012,9 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ] @@ -1068,8 +1082,9 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ] @@ -1138,8 +1153,9 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: %a = call ptr @get_ptr() @@ -1195,8 +1211,9 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1267,8 +1284,9 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1339,8 +1357,9 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_b ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1418,8 +1437,9 @@ define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predec ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 5e3a70222d7bb..6c9e3694b2a34 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -23,7 +23,7 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -46,18 +46,18 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD8]], splat (i64 3) ; CHECK-NEXT: [[TMP11]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND5]], <4 x i64> [[VEC_PHI7]] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[TMP7]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i64> [[VEC_IND5]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add nuw nsw <4 x i64> [[VEC_IND5]], splat (i64 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -124,7 +124,7 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -147,18 +147,18 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD8]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP11]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND5]], <4 x i64> [[VEC_PHI7]] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[TMP7]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i64> [[VEC_IND5]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add nuw nsw <4 x i64> [[VEC_IND5]], splat (i64 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -231,7 +231,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 3) ; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP7]], <4 x i8> [[VEC_IND]], <4 x i8> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -257,11 +257,11 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i8> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX4]] to i8 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[OFFSET_IDX]] @@ -269,7 +269,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD10]], splat (i8 3) ; CHECK-NEXT: [[TMP17]] = select <4 x i1> [[TMP16]], <4 x i8> [[VEC_IND5]], <4 x i8> [[VEC_PHI7]] ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i32 [[INDEX4]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i8> [[VEC_IND5]], splat (i8 4) +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add nuw nsw <4 x i8> [[VEC_IND5]], splat (i8 4) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT11]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 15daf90ad770c..813c040bd78f7 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -495,7 +495,7 @@ define i64 @test_reduction_with_widen_induction_order_1(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP1]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -513,7 +513,7 @@ define i64 @test_reduction_with_widen_induction_order_1(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -524,7 +524,7 @@ define i64 @test_reduction_with_widen_induction_order_1(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP6]] = add <4 x i64> [[VEC_PHI6]], [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND5]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND5]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add nuw nsw <4 x i64> [[VEC_IND5]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -589,7 +589,7 @@ define i64 @test_reduction_with_widen_induction_order_2(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP1]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: @@ -607,7 +607,7 @@ define i64 @test_reduction_with_widen_induction_order_2(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -618,7 +618,7 @@ define i64 @test_reduction_with_widen_induction_order_2(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP6]] = add <4 x i64> [[VEC_PHI5]], [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND6]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND6]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add nuw nsw <4 x i64> [[VEC_IND6]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index d3c8c1304b588..2f05435bc75ba 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -689,6 +689,17 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr, i64 %n ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ , %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ , %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 1, [[INDEX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[PTR]], i64 [[OFFSET_IDX1]] +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP2]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP5]], [[TMP2]] +; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[TMP3]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[INDEX]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x double> [[TMP2]], i32 3 @@ -801,7 +812,7 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -865,7 +876,7 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index bd0c098d335a2..cebd52fa7f866 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -875,14 +875,14 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[STEP_ADD]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -965,13 +965,13 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> [[DOTSPLAT]], ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 53d5ac472c892..1c7ec9b28bd71 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -37,19 +37,19 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N) ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] -; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <8 x i32> , [[DOTSPLAT3]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i32 [[TMP0]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION4]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] ; CHECK-NEXT: store <8 x i32> [[VEC_IND]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -126,19 +126,19 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N, ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[J_012]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] -; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[J_012]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> , [[DOTSPLAT3]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i32 [[J_012]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION4]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: store <8 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP6]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -219,19 +219,19 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) { ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[STEP]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT6]] -; CHECK-NEXT: [[INDUCTION7:%.*]] = add <8 x i32> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <8 x i32> , [[DOTSPLAT6]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <8 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw i32 [[TMP3]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND10:%.*]] = phi <8 x i32> [ [[INDUCTION7]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND10:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: store <8 x i32> [[VEC_IND10]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP6]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <8 x i32> [[VEC_IND10]], [[DOTSPLAT9]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND10]], [[DOTSPLAT9]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 66e4de5da7955..76fa6bdb543a7 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -29,7 +29,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -70,7 +70,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IND: middle.block: @@ -159,7 +159,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -1578,7 +1578,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -1631,18 +1631,18 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0 ; IND-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1 -; IND-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] -; IND-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] -; IND-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 1, !alias.scope [[META17:![0-9]+]] -; IND-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 1, !alias.scope [[META17]] +; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP11]] +; IND-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP13]] +; IND-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 1, !alias.scope [[META17:![0-9]+]] +; IND-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP24]], align 1, !alias.scope [[META17]] ; IND-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]] ; IND-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT]], i64 4 ; IND-NEXT: [[TMP17:%.*]] = getelementptr [[PAIR_I32]], ptr [[P]], i64 [[INDEX]] ; IND-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 12 -; IND-NEXT: store i32 [[TMP25]], ptr [[TMP16]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] +; IND-NEXT: store i32 [[TMP14]], ptr [[TMP16]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; IND-NEXT: store i32 [[TMP15]], ptr [[TMP18]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IND: middle.block: @@ -1801,7 +1801,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP26]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2439,7 +2439,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -2488,7 +2488,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; IND-NEXT: store i16 [[TMP8]], ptr [[TMP6]], align 2 ; IND-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IND: middle.block: @@ -2611,7 +2611,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: store i16 [[TMP17]], ptr [[TMP13]], align 2 ; UNROLL-NO-IC-NEXT: store i16 [[TMP18]], ptr [[TMP14]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4048,7 +4048,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; CHECK: middle.block: @@ -4081,7 +4081,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IND: middle.block: @@ -4155,7 +4155,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4249,7 +4249,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: @@ -4287,7 +4287,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IND: middle.block: @@ -4377,7 +4377,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4470,7 +4470,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4479,7 +4479,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; CHECK: middle.block: @@ -4508,7 +4508,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; IND-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4518,7 +4518,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; IND: middle.block: @@ -4548,7 +4548,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4592,7 +4592,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4604,7 +4604,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4633,7 +4633,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i64 0 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4698,7 +4698,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: middle.block: @@ -4736,7 +4736,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 4) ; IND-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; IND: middle.block: @@ -4819,7 +4819,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4943,7 +4943,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; CHECK: middle.block: @@ -4986,7 +4986,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; IND-NEXT: [[TMP13:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; IND-NEXT: [[TMP14]] = or <2 x i32> [[VEC_PHI]], [[TMP13]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i16> [[VEC_IND]], splat (i16 2) ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; IND: middle.block: @@ -5121,7 +5121,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI1]], [[TMP27]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], splat (i16 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i16> [[STEP_ADD]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5309,9 +5309,9 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) -; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <2 x i32> [[VEC_IND2]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <2 x i32> [[VEC_IND4]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; CHECK: middle.block: @@ -5360,18 +5360,18 @@ define i64 @trunc_with_first_order_recurrence() { ; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; IND-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]] ; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42) -; IND-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]] +; IND-NEXT: [[TMP3:%.*]] = add nuw <2 x i32> [[TMP0]], [[VEC_IND2]] ; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] ; IND-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> ; IND-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] -; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1) +; IND-NEXT: [[TMP7:%.*]] = shl nuw <2 x i32> [[VEC_IND4]], splat (i32 1) ; IND-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] ; IND-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; IND-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) -; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <2 x i32> [[VEC_IND2]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <2 x i32> [[VEC_IND4]], splat (i32 2) ; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; IND: middle.block: @@ -5518,9 +5518,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2) -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add nuw nsw <2 x i32> [[STEP_ADD7]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add nuw nsw <2 x i32> [[STEP_ADD8]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5683,7 +5683,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; CHECK: middle.block: @@ -5711,7 +5711,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]] ; IND-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; IND: middle.block: @@ -5778,7 +5778,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5900,9 +5900,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = mul <2 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -5914,7 +5914,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[TMP20]], ptr [[TMP21]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; CHECK: middle.block: @@ -5972,8 +5972,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; IND-NEXT: [[TMP15:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], -; IND-NEXT: [[TMP16:%.*]] = shl i32 [[STEP]], 1 +; IND-NEXT: [[TMP15:%.*]] = mul nuw nsw <2 x i32> [[DOTSPLAT]], +; IND-NEXT: [[TMP16:%.*]] = shl nsw i32 [[STEP]], 1 ; IND-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0 ; IND-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND-NEXT: br label [[VECTOR_BODY:%.*]] @@ -5985,7 +5985,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[TMP17]], ptr [[TMP18]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; IND-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; IND: middle.block: @@ -6044,7 +6044,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] ; UNROLL-NEXT: [[TMP16:%.*]] = shl <2 x i32> [[DOTSPLAT]], splat (i32 1) -; UNROLL-NEXT: [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], +; UNROLL-NEXT: [[TMP17:%.*]] = mul nuw nsw <2 x i32> [[DOTSPLAT]], ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6058,7 +6058,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: store <2 x i32> [[TMP18]], ptr [[TMP20]], align 4 ; UNROLL-NEXT: store <2 x i32> [[TMP19]], ptr [[TMP21]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP16]] +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[STEP_ADD]], [[TMP16]] ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL: middle.block: @@ -6120,8 +6120,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[BROADCAST_SPLAT]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP18]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> , [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> zeroinitializer, [[TMP19]] ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6135,7 +6135,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP20]], ptr [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], ptr [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP17]] +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[STEP_ADD]], [[TMP17]] ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -6194,7 +6194,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[DOTSPLAT]], splat (i32 2) -; INTERLEAVE-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[DOTSPLAT]], +; INTERLEAVE-NEXT: [[TMP17:%.*]] = mul nsw <4 x i32> [[DOTSPLAT]], ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6208,7 +6208,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[TMP16]] +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[STEP_ADD]], [[TMP16]] ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; INTERLEAVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll index 8975c058c6b79..da8efa77bf5e6 100644 --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -105,7 +105,7 @@ define i32 @cond_branch(i32 %a, ptr %src) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> splat (i32 10) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -152,7 +152,7 @@ define i32 @optimizable_trunc_used_outside() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll index d6a6fded712a5..7224e26d6a0e6 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll @@ -33,12 +33,13 @@ define void @nusw_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -94,12 +95,13 @@ define void @inbounds_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -155,12 +157,13 @@ define void @nuw_drop(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -209,8 +212,9 @@ define void @nusw_preservation_2(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -258,8 +262,9 @@ define void @inbounds_preservation_2(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -307,8 +312,9 @@ define void @nuw_drop_2(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index fc2e2337e0569..de079374eb401 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -345,7 +345,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -595,7 +595,7 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK-NEXT: store i64 [[TMP27]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store i64 [[TMP28]], ptr [[TMP11]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -721,7 +721,7 @@ define void @mixed_load3_store3(ptr nocapture %A) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <12 x i32> ; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: @@ -1157,13 +1157,13 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT]], i64 4 -; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP5]] +; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 4 -; CHECK-NEXT: [[DOTSPLIT4:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[DOTSPLIT4:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT4]], i64 4 -; CHECK-NEXT: [[DOTSPLIT5:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[DOTSPLIT5:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT5]], i64 4 -; CHECK-NEXT: [[DOTSPLIT6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]] +; CHECK-NEXT: [[DOTSPLIT6:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT6]], i64 4 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 0 @@ -1178,7 +1178,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: @@ -1346,12 +1346,12 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i64 0 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP7]], i64 1 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP7]], i64 2 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP7]], i64 3 -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -3) +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP8]], i64 1 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP8]], i64 2 @@ -1385,7 +1385,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP11]], align 4 ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 70b1ea13677b8..69d2aa4c620c1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -23,7 +23,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) ; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -77,7 +77,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: @@ -180,7 +180,7 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) ; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4) ; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 ; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -398,7 +398,7 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4) ; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) @@ -506,7 +506,7 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) ; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4) ; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 ; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -724,7 +724,7 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4) ; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) @@ -829,7 +829,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) ; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 ; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -883,7 +883,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 ; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll index c958ea7b9b88e..21ef1885b75b9 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll @@ -22,7 +22,7 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -92,7 +92,7 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index b991d58eb2b8d..c55b089043e25 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -30,7 +30,7 @@ define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -101,7 +101,7 @@ define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -253,7 +253,7 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -297,7 +297,7 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -396,7 +396,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -440,7 +440,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -544,7 +544,7 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3) ; CHECK-VF4IC1-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -589,7 +589,7 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC4-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 91c83103bf8f2..8d3bd267b9482 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -22,7 +22,7 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -85,7 +85,7 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -215,7 +215,7 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -278,7 +278,7 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[VEC_PHI2]], <4 x i64> [[STEP_ADD_2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_PHI3]], <4 x i64> [[STEP_ADD_3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -408,7 +408,7 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -471,7 +471,7 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -601,7 +601,7 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -664,7 +664,7 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -794,7 +794,7 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -857,7 +857,7 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -989,7 +989,7 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1062,7 +1062,7 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -1208,7 +1208,7 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1281,7 +1281,7 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -1428,7 +1428,7 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1505,7 +1505,7 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI4]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -1661,7 +1661,7 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-VF4IC1-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775804 ; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1730,7 +1730,7 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775792 ; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 86515ebe25637..162803a377bc0 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -388,7 +388,7 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VEC-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -448,7 +448,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VEC-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -597,7 +597,7 @@ define i32 @postinc_not_iv_backedge_value(i32 %k) { ; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; VEC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; VEC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 00256a5c4a456..4b3f370f4cdaf 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -418,7 +418,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -577,7 +577,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -684,7 +684,7 @@ define void @adding_offset_overflows(i32 %n, ptr %A) { ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -707,7 +707,7 @@ define void @adding_offset_overflows(i32 %n, ptr %A) { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll index f8ddd344f5587..a598f154ef54b 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -21,7 +21,7 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index 1e4a98d22bf17..9c73d85bd58ff 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -23,14 +23,14 @@ define i32 @test1() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -38,18 +38,18 @@ define i32 @test1() { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -96,14 +96,14 @@ define i32 @test2() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -111,18 +111,18 @@ define i32 @test2() { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -169,7 +169,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -178,7 +178,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -189,18 +189,18 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 2) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] -; CHECK: [[_LR_PH_I1]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -257,14 +257,14 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -272,18 +272,18 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -519,14 +519,14 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -535,18 +535,18 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -650,14 +650,14 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I1]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -680,12 +680,12 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 @@ -742,7 +742,7 @@ define i32 @non_uniform_live_out() { ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 481fa04cf7164..30d01e8b790a7 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -175,7 +175,7 @@ define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, pt ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index bfc7feecafbc4..81095290bcc92 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -654,7 +654,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -670,16 +670,16 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[VEC_IND7]] to <4 x i8> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6]] ; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw <4 x i32> [[VEC_IND7]], splat (i32 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -720,7 +720,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP2]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[MIDDLE_BLOCK]]: @@ -736,16 +736,16 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i32 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = trunc <2 x i32> [[VEC_IND7]] to <2 x i8> ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], splat (i32 2) +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw <2 x i32> [[VEC_IND7]], splat (i32 2) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index f9f7feb7bdfbc..f57041257c919 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -571,7 +571,7 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; NPGSO-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1) ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; NPGSO-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508 ; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll index 8a77d14b9d3ba..f7986d8f1f928 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll @@ -40,7 +40,7 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP10]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -142,7 +142,7 @@ define void @inner_latch_header_second_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP9]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll index 2e17f7adca279..32dbc273cb6b6 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll @@ -42,7 +42,7 @@ define void @test(ptr %src, i64 %n) { ; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_1_LATCH5]], label [[LOOP_2_HEADER1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll index 59e3d71f27a38..afd5000818bfc 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll @@ -36,7 +36,7 @@ define void @wide_phi_2_predecessors(ptr noalias %A, ptr noalias %B, i32 %c, i1 ; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER_LATCH3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP10]], <4 x ptr> align 8 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -144,7 +144,7 @@ define void @wide_phi_2_predecessors_phi_ops_swapped(ptr noalias %A, ptr noalias ; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER_LATCH3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP10]], <4 x ptr> align 8 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll index b2f1954ca989b..020f84209b23b 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll @@ -53,7 +53,7 @@ define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) { ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP3]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -196,7 +196,7 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP3]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 47743753349ed..b224a5a86b83d 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -25,8 +25,8 @@ define void @foo() { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP4]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP4]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -50,7 +50,7 @@ define void @foo() { ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi [ [[TMP12]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], align 4 [[TMP10]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 4086c79082cce..0a19e0ec55a4b 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -48,7 +48,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll index fb9b1c7d62e3e..5fcd6db2eb28a 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll @@ -50,7 +50,7 @@ ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) +; CHECK: %[[VecIndNext]] = add nuw nsw <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], {{.*}} ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index c491477c4d2be..623a9435edec1 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -71,9 +71,9 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 4 +; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -83,7 +83,7 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP20]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -192,9 +192,9 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[STEP]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP18]] +; CHECK-NEXT: [[TMP17:%.*]] = mul nsw i32 [[STEP]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -204,7 +204,7 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -385,9 +385,9 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[CONV]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CONV]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[CONV]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP16]] +; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i32 [[CONV]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -397,7 +397,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP17]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr34681.ll b/llvm/test/Transforms/LoopVectorize/pr34681.ll index 0f509a5c4eeb3..a04a4e9eea6fe 100644 --- a/llvm/test/Transforms/LoopVectorize/pr34681.ll +++ b/llvm/test/Transforms/LoopVectorize/pr34681.ll @@ -80,7 +80,7 @@ define i32 @foo1(i32 %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i ; CHECK-NEXT: [[TMP28:%.*]] = sext <4 x i16> [[TMP27]] to <4 x i32> ; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[TMP28]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -185,7 +185,7 @@ define i32 @foo2(i16 zeroext %N, ptr nocapture readnone %A, ptr nocapture readon ; CHECK-NEXT: [[TMP25:%.*]] = sext <4 x i16> [[TMP24]] to <4 x i32> ; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], [[TMP25]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/pr35773.ll b/llvm/test/Transforms/LoopVectorize/pr35773.ll index b7165156e28fd..00b9e9133af9b 100644 --- a/llvm/test/Transforms/LoopVectorize/pr35773.ll +++ b/llvm/test/Transforms/LoopVectorize/pr35773.ll @@ -17,8 +17,8 @@ define void @doit1(ptr %ptr) { ; CHECK-NEXT: store <4 x i32> [[I32_IV]], ptr [[GEP1]], align 4 ; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i32 [[MAIN_IV]], 4 -; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], splat (i32 36) -; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], splat (i8 36) +; CHECK-NEXT: [[I32_IV_NEXT]] = add nuw nsw <4 x i32> [[I32_IV]], splat (i32 36) +; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add nuw nsw <4 x i8> [[IV_FROM_TRUNC]], splat (i8 36) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; diff --git a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll index 98963a72c5ad0..310c7729a6b63 100644 --- a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll +++ b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll @@ -14,7 +14,7 @@ define i16 @duplicate_lcssa(i16 %val) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <4 x i16> [[VEC_IND]], splat (i16 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4) ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll index 1bb6454cdeea2..c4a5a8b18dd5d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll @@ -72,12 +72,13 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP21]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index ebd532aa5032c..f17866718a0ee 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -27,7 +27,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] ; CHECK-NEXT: [[PREDPHI7]] = select i1 [[C_2]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index cf973affae5f2..5654dbd727f85 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -224,7 +224,7 @@ define void @predicated_phi_dbg(i64 %n, ptr %x) { ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[X]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP21]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -304,7 +304,7 @@ define void @predicated_phi_dbg(i64 %n, ptr %x) { ; DEBUGLOC-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[X]], i64 [[INDEX]], !dbg [[DBG57:![0-9]+]] ; DEBUGLOC-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP21]], align 8, !dbg [[DBG58:![0-9]+]] ; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG53]] -; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4), !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4), !dbg [[DBG53]] ; DEBUGLOC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG59:![0-9]+]] ; DEBUGLOC-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG59]], !llvm.loop [[LOOP60:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: @@ -385,7 +385,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -428,7 +428,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; DEBUGLOC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]], !dbg [[DBG77:![0-9]+]] ; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4, !dbg [[DBG78:![0-9]+]] ; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG75]] -; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG76]] +; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG76]] ; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG79:![0-9]+]] ; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG79]], !llvm.loop [[LOOP80:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 5f54b0ac7834a..caf1a934ed70a 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -28,7 +28,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n, i32 %divisor) { ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> ; CHECK-NEXT: [[TMP4]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index c708715c623e6..dfdf1100eb57b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -365,7 +365,7 @@ define void @reduc_store_inside_unrolled(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -571,7 +571,7 @@ define void @reduc_store_middle_store_predicated(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index c858f201e01fa..d140bc09fe731 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -161,8 +161,8 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP7]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -187,7 +187,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 ; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 359132f7904cc..d87d39e684993 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -43,7 +43,7 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -119,7 +119,7 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store [[TMP12]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -184,9 +184,9 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], splat (i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw [[TMP6]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i32 [[TMP8]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -196,7 +196,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index cde2de73b7bfd..70127a6762423 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -25,7 +25,7 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -87,7 +87,7 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP9]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -152,9 +152,9 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add nuw nsw <2 x i16> [[VEC_IND1]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add nuw nsw <2 x i16> [[VEC_IND3]], splat (i16 2) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -231,7 +231,7 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP18]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -283,7 +283,7 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll index b85f2746a0b14..98203a245f863 100644 --- a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll +++ b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll @@ -18,7 +18,7 @@ define void @uitofp_preserve_nneg(ptr %result, i32 %size, float %y) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 3b515a2acb1a7..24dc182fe24a1 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -114,7 +114,7 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: store i16 0, ptr [[TMP6]], align 2 ; CHECK-NEXT: store i16 0, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI1]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[PREDPHI1]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index 927fefc73ceea..f80d7b695e2af 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -112,7 +112,7 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -168,7 +168,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -272,7 +272,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -328,7 +328,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -383,7 +383,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -520,7 +520,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -569,7 +569,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -625,7 +625,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -729,7 +729,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -785,7 +785,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -841,7 +841,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: @@ -937,7 +937,7 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP9]], align 2 ; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index d6277d657ea7e..0f191b2d8a278 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -112,7 +112,7 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -168,7 +168,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -270,7 +270,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -325,7 +325,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -374,7 +374,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -430,7 +430,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: @@ -486,7 +486,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: @@ -542,7 +542,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index edf04bbcbcdff..7ff10c544f72a 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -53,7 +53,7 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <8 x i64> [[TMP34]], ptr [[TMP35]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -129,7 +129,7 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <8 x i64> [[TMP35]], ptr [[TMP36]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -203,7 +203,7 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <8 x i64> [[TMP33]], ptr [[TMP34]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index 32873a4e90e81..a5bb07f1fd4ef 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -124,7 +124,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP17]], ptr [[TMP18]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: @@ -248,7 +248,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: @@ -301,7 +301,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: @@ -442,7 +442,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: @@ -494,7 +494,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: @@ -549,7 +549,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF2: middle.block: @@ -601,7 +601,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF4: middle.block: @@ -651,7 +651,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF2: middle.block: @@ -689,7 +689,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP17]], ptr [[TMP18]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF4: middle.block: @@ -830,7 +830,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF2: middle.block: @@ -883,7 +883,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF4: middle.block: @@ -939,7 +939,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF2: middle.block: @@ -992,7 +992,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index 607d1365098f2..fb962c017156d 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -29,8 +29,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: @@ -71,8 +71,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: @@ -162,8 +162,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: @@ -219,8 +219,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: middle.block: @@ -261,8 +261,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: @@ -325,8 +325,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: @@ -382,8 +382,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: @@ -446,8 +446,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: @@ -503,8 +503,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: @@ -567,8 +567,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: @@ -624,8 +624,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: @@ -688,8 +688,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: @@ -744,8 +744,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: @@ -807,8 +807,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: middle.block: @@ -863,8 +863,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: middle.block: @@ -926,8 +926,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: @@ -982,8 +982,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: @@ -1039,8 +1039,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: @@ -1081,8 +1081,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: @@ -1138,8 +1138,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: @@ -1180,8 +1180,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: @@ -1237,8 +1237,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF2: middle.block: @@ -1279,8 +1279,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4: middle.block: @@ -1343,8 +1343,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: @@ -1400,8 +1400,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: @@ -1464,8 +1464,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: @@ -1521,8 +1521,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: @@ -1585,8 +1585,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF2: middle.block: @@ -1642,8 +1642,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF4: middle.block: @@ -1706,8 +1706,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF2: middle.block: @@ -1763,8 +1763,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF4: middle.block: @@ -1827,8 +1827,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF2: middle.block: @@ -1884,8 +1884,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF4: middle.block: @@ -1948,8 +1948,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF2: middle.block: @@ -2005,8 +2005,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll index 94bc32205ec11..90b4be0f96dce 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -20,7 +20,7 @@ define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll index eaebfebf533ea..909ca14f21639 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -41,7 +41,7 @@ define void @inner_loop_reduction(ptr noalias nocapture readonly %a.in, ptr noal ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> align 8 %[[C_PTR]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 -; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], splat (i64 4) +; CHECK-NEXT: %{{.*}} = add nuw nsw <4 x i64> %[[VEC_INDEX]], splat (i64 4) ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll index 180fd84c14450..72ba993defb5a 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll @@ -34,7 +34,7 @@ define void @widen_call_instruction(ptr noalias nocapture readonly %a.in, ptr no ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[C_OUT]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP9]], <4 x ptr> align 8 [[TMP7]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll index 48a11fa9182e7..429e964890103 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll @@ -36,7 +36,7 @@ define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, doubl ; CHECK-NEXT: br i1 [[TMP5]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -96,7 +96,7 @@ define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -157,7 +157,7 @@ define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -219,7 +219,7 @@ define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, d ; CHECK-NEXT: br i1 [[TMP7]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index d08ca8c99e8ba..9bb010c0431d8 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -102,7 +102,7 @@ define void @wide_gep_multiple_indices_some_invariant(ptr noalias %dst, ptr noal ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: