diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a63956c0cba6b..7c13c45459dfa 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7607,7 +7607,10 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI, Ptr, &Plan.getVF(), getLoadStoreType(I), /*Stride*/ -1, Flags, VPI->getDebugLoc()); } else { - VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), + const DataLayout &DL = I->getDataLayout(); + VPValue *Offset = Plan.getConstantInt( + DL.getIndexType(Ptr->getUnderlyingValue()->getType()), 0); + VectorPtr = new VPVectorPointerRecipe(Ptr, Offset, getLoadStoreType(I), GEP ? GEP->getNoWrapFlags() : GEPNoWrapFlags::none(), VPI->getDebugLoc()); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0c7d9c0193a03..aceb38b9f309a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1964,20 +1964,22 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, #endif }; -/// A recipe to compute the pointers for widened memory accesses of IndexTy. -class VPVectorPointerRecipe : public VPRecipeWithIRFlags, - public VPUnrollPartAccessor<1> { +/// A recipe to compute the pointers for widened memory accesses of +/// SourceElementTy. +class VPVectorPointerRecipe : public VPRecipeWithIRFlags { Type *SourceElementTy; public: - VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, + VPVectorPointerRecipe(VPValue *Ptr, VPValue *Offset, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL) - : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef(Ptr), - GEPFlags, DL), + : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, {Ptr, Offset}, GEPFlags, + DL), SourceElementTy(SourceElementTy) {} VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) + VPValue *getOffset() { return getOperand(1); } + void execute(VPTransformState &State) override; Type *getSourceElementType() const { return SourceElementTy; } @@ -1997,14 +1999,11 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, } VPVectorPointerRecipe *clone() override { - return new VPVectorPointerRecipe(getOperand(0), SourceElementTy, - getGEPNoWrapFlags(), getDebugLoc()); + return new VPVectorPointerRecipe(getOperand(0), getOffset(), + SourceElementTy, getGEPNoWrapFlags(), + getDebugLoc()); } - /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that - /// this is only accurate after the VPlan has been unrolled. - bool isFirstPart() const { return getUnrollPart(*this) == 0; } - /// Return the cost of this VPHeaderPHIRecipe. InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1c88b56ca89dc..4634b17f6b480 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2632,15 +2632,13 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; - unsigned CurrentPart = getUnrollPart(*this); - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); Value *Ptr = State.get(getOperand(0), VPLane(0)); - - Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); - Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Increment, - "", getGEPNoWrapFlags()); - + Value *Step = State.get(getOffset(), true); + if (auto *C = dyn_cast(Step)) + if (C->isZero()) + return State.set(this, Ptr, /*IsScalar=*/true); + Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Step, "", + getGEPNoWrapFlags()); State.set(this, ResultPtr, /*IsScalar*/ true); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9174058baad65..7465982fac9da 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1381,14 +1381,6 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { } } - // VPVectorPointer for part 0 can be replaced by their start pointer. - if (auto *VecPtr = dyn_cast(Def)) { - if (VecPtr->isFirstPart()) { - VecPtr->replaceAllUsesWith(VecPtr->getOperand(0)); - return; - } - } - // VPScalarIVSteps for part 0 can be replaced by their start value, if only // the first lane is demanded. if (auto *Steps = dyn_cast(Def)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 4af00f986aab0..abafe50b947ba 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -298,6 +298,22 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { Copy->setOperand(1, getValueForPart(Op, Part)); continue; } + if (auto *VPR = dyn_cast(&R)) { + VPBuilder Builder(VPR); + auto *Prev = cast(getValueForPart(VPR, Part - 1)) + ->getOperand(1); + VPValue *Increment = &Plan.getVF(); + Type *IncTy = TypeInfo.inferScalarType(Increment); + Increment = Builder.createScalarZExtOrTrunc( + Increment, TypeInfo.inferScalarType(Prev), IncTy, + DebugLoc::getCompilerGenerated()); + VPIRFlags Flags = VPIRFlags::WrapFlagsTy(true, true); + VPInstruction *Add = Builder.createNaryOp( + Instruction::Add, {Prev, Increment}, Flags, VPR->getDebugLoc()); + Copy->setOperand(0, VPR->getOperand(0)); + Copy->setOperand(1, Add); + continue; + } if (auto *Red = dyn_cast(&R)) { auto *Phi = dyn_cast(R.getOperand(0)); if (Phi && Phi->isOrdered()) { @@ -315,12 +331,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { // Add operand indicating the part to generate code for, to recipes still // requiring it. if (isa(Copy) || + VPVectorEndPointerRecipe>(Copy) || match(Copy, m_VPInstruction())) Copy->addOperand(getConstantInt(Part)); - if (isa(R)) + if (isa(R)) Copy->setOperand(0, R.getOperand(0)); } } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 72e813b62025f..3227862c7350f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -21,7 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP11]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] @@ -36,9 +37,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]] ; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]] -; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP11]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP34]], align 8 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP39]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4b097ba2422e4..34bba684b3bb1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -30,7 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 +; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8 +; DEFAULT-NEXT: [[TMP10:%.*]] = mul i64 [[TMP13]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 @@ -40,9 +41,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3 -; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP14]] +; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP13]] ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 1 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP15]], align 1 ; DEFAULT-NEXT: [[TMP16:%.*]] = zext [[WIDE_LOAD]] to @@ -56,9 +55,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP24:%.*]] = trunc [[TMP22]] to ; DEFAULT-NEXT: [[TMP25:%.*]] = trunc [[TMP23]] to ; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 3 -; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP28]] +; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP13]] ; DEFAULT-NEXT: store [[TMP24]], ptr [[TMP26]], align 1 ; DEFAULT-NEXT: store [[TMP25]], ptr [[TMP29]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index 53cb0653fd241..78549ffeeb3de 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -144,7 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE-4-VLA: vector.ph: ; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 4 ; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]] @@ -155,14 +156,10 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 -; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP6]] -; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 +; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], [[TMP5]] +; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP9]], [[TMP5]] +; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]] ; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]] -; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 12 ; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP12]] ; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 1 ; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP7]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 31453e9509ea3..d8cc8167f00af 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -37,7 +37,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[N_VEC]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]] +; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]] ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -72,7 +72,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP9]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -82,17 +83,13 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], [[ACTIVE_LANE_MASK2]], poison) ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr align 8 [[TMP13]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr align 8 [[TMP16]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -160,7 +157,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP12]], 50 ; TFNONE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END]] ; TFNONE: [[IF_THEN]]: -; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR3]] +; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR4]] ; TFNONE-NEXT: br label %[[IF_END]] ; TFNONE: [[IF_END]]: ; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP9]], %[[IF_THEN]] ], [ 0, %[[FOR_BODY]] ] @@ -201,7 +198,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP9]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -211,8 +209,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], [[ACTIVE_LANE_MASK2]], poison) @@ -225,9 +221,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP11]], [[TMP15]], zeroinitializer ; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP12]], [[TMP16]], zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP17]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr align 8 [[TMP20]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -308,10 +302,10 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 50 ; TFNONE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] ; TFNONE: [[IF_THEN]]: -; TFNONE-NEXT: [[TMP14:%.*]] = call i64 @foo(i64 [[TMP13]]) #[[ATTR4:[0-9]+]] +; TFNONE-NEXT: [[TMP14:%.*]] = call i64 @foo(i64 [[TMP13]]) #[[ATTR5:[0-9]+]] ; TFNONE-NEXT: br label %[[IF_END]] ; TFNONE: [[IF_ELSE]]: -; TFNONE-NEXT: [[TMP15:%.*]] = call i64 @foo(i64 0) #[[ATTR4]] +; TFNONE-NEXT: [[TMP15:%.*]] = call i64 @foo(i64 0) #[[ATTR5]] ; TFNONE-NEXT: br label %[[IF_END]] ; TFNONE: [[IF_END]]: ; TFNONE-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP14]], %[[IF_THEN]] ], [ [[TMP15]], %[[IF_ELSE]] ] @@ -355,7 +349,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP9]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -365,8 +360,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], [[ACTIVE_LANE_MASK2]], poison) @@ -385,9 +378,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP11]], [[TMP21]], [[TMP17]] ; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP12]], [[TMP22]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1 -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP25]] +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP23]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr align 8 [[TMP26]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -464,7 +455,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[N_VEC]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] +; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]] ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -481,7 +472,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] +; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]] ; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -512,7 +503,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] +; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]] ; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -529,7 +520,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFA_INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; TFA_INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] +; TFA_INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFA_INTERLEAVE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 ; TFA_INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -588,7 +579,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[N_VEC]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]] +; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR7:[0-9]+]] ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -645,7 +636,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP9]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -655,17 +647,13 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], [[ACTIVE_LANE_MASK2]], poison) ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr align 8 [[TMP13]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr align 8 [[TMP16]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -739,7 +727,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8 ; TFNONE-NEXT: [[MULADD]] = tail call double @llvm.fmuladd.f64(double [[LOAD]], double [[M]], double [[FMA_SUM]]) ; TFNONE-NEXT: [[TOINT:%.*]] = fptoui double [[LOAD]] to i64 -; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[TOINT]]) #[[ATTR3]] +; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[TOINT]]) #[[ATTR4]] ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -811,7 +799,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP9]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) @@ -824,8 +813,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP10]], [[ACTIVE_LANE_MASK2]], poison) @@ -836,9 +823,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( [[TMP13]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[TMP14]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP15]], ptr align 8 [[TMP17]], [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP16]], ptr align 8 [[TMP20]], [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], splat (double -0.000000e+00) @@ -912,7 +897,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFNONE: [[LOOP]]: ; TFNONE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_END:.*]] ] ; TFNONE-NEXT: [[LD:%.*]] = load double, ptr [[P2]], align 8 -; TFNONE-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]] +; TFNONE-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR8:[0-9]+]] ; TFNONE-NEXT: [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00 ; TFNONE-NEXT: br i1 [[COND1]], label %[[LOOP_MIDDLE:.*]], label %[[LOOP_END]] ; TFNONE: [[LOOP_MIDDLE]]: @@ -933,7 +918,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON: [[LOOP]]: ; TFCOMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2]], align 8 -; TFCOMMON-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR6:[0-9]+]] +; TFCOMMON-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]] ; TFCOMMON-NEXT: [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00 ; TFCOMMON-NEXT: [[SINK:%.*]] = select i1 [[COND1]], double 0.000000e+00, double 1.000000e+00 ; TFCOMMON-NEXT: store double [[SINK]], ptr [[P]], align 8 @@ -958,7 +943,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP9]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP9]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR6:[0-9]+]] +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00 ; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP8]], double 0.000000e+00, double 1.000000e+00 ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = or i1 [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK2]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 3142227815383..a132e81dd14f7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -12,7 +12,8 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -21,15 +22,11 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 1 ; CHECK-NEXT: [[TMP18:%.*]] = sext [[WIDE_LOAD3]] to @@ -115,7 +112,8 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -124,15 +122,11 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP17]], align 1 ; CHECK-NEXT: [[TMP18:%.*]] = zext [[WIDE_LOAD3]] to diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 71eb5476b7ac5..864f105fae20a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -901,22 +901,19 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 2 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 3 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP13]] +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext [[WIDE_LOAD]] to ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 3 -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP19]] +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext [[WIDE_LOAD1]] to ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul [[TMP22]], [[TMP15]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 44ae1757ce6e6..c53e025595113 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -294,7 +294,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 4 +; DEFAULT-NEXT: [[TMP4:%.*]] = mul i64 [[TMP5]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 @@ -305,9 +306,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 2 -; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP13]], i64 [[TMP17]] +; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP13]], i64 [[TMP5]] ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP18]], align 2 ; DEFAULT-NEXT: [[TMP19:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -355,7 +354,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; VSCALEFORTUNING2: [[VECTOR_PH]]: ; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 4 +; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP7]], 2 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 @@ -366,9 +366,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[INDEX]] -; VSCALEFORTUNING2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 -; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i64 [[TMP11]] +; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i64 [[TMP7]] ; VSCALEFORTUNING2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 ; VSCALEFORTUNING2-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 2 ; VSCALEFORTUNING2-NEXT: [[TMP13:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index d84463430179d..727dc1cde7357 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -195,59 +195,56 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP14]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP15]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] -; CHECK-UNORDERED-NEXT: [[TMP16]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] -; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP3]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER4:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_2]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER5:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_3]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[VECTOR_POINTER]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[VECTOR_POINTER4]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[VECTOR_POINTER5]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP6]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[TMP7]] = fadd [[WIDE_LOAD6]], [[VEC_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP8]] = fadd [[WIDE_LOAD7]], [[VEC_PHI2]] +; CHECK-UNORDERED-NEXT: [[TMP9]] = fadd [[WIDE_LOAD8]], [[VEC_PHI3]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP15]], [[TMP14]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP16]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP17]], [[BIN_RDX7]] -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX8]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP7]], [[TMP6]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX9:%.*]] = fadd [[TMP8]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX10:%.*]] = fadd [[TMP9]], [[BIN_RDX9]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX10]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP20]], [[SUM_07]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP12]], [[SUM_07]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fadd_strict_unroll @@ -259,52 +256,49 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP14]], [[WIDE_LOAD1]]) -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP15]], [[WIDE_LOAD2]]) -; CHECK-ORDERED-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP16]], [[WIDE_LOAD3]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP3]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER1:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER2:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[VECTOR_POINTER]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[VECTOR_POINTER1]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[VECTOR_POINTER2]], align 4 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP6]], [[WIDE_LOAD3]]) +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP7]], [[WIDE_LOAD4]]) +; CHECK-ORDERED-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP8]], [[WIDE_LOAD5]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP19]], [[SUM_07]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP11]], [[SUM_07]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_strict_unroll @@ -313,21 +307,22 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5 +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP5]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP9]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP11]] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP13]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -336,53 +331,49 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP21]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP13]], [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP16]], [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP19]], [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP22]], [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP23]]) -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD9]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP24]], [[TMP25]]) -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD10]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP26]], [[TMP27]]) -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP30]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP29]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], [[TMP32]] -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = shl nuw i64 [[TMP34]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = add i64 [[INDEX]], [[TMP38]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP33]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP36]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP39]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = xor i1 [[TMP40]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT15:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP1]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER9:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER10:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP14]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER]], [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER9]], [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER10]], [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP15]]) +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP16]], [[TMP17]]) +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD12]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP18]], [[TMP19]]) +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD13]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP22]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP20]], [[TMP21]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = shl nuw i64 [[TMP23]], 3 +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], [[TMP24]] +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = add i64 [[INDEX]], [[TMP27]] +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = mul nuw i64 [[TMP29]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], [[TMP30]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP25]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP28]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP31]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = xor i1 [[TMP32]], true +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: ret float [[TMP30]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP22]] ; @@ -1228,75 +1219,66 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP20]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP23]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP24]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP25]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP26]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP27]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP3]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER4:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_2]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER5:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_3]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[VECTOR_POINTER]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[VECTOR_POINTER4]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[VECTOR_POINTER5]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_2]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER11:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_3]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[VECTOR_POINTER9]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[VECTOR_POINTER10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD15:%.*]] = load , ptr [[VECTOR_POINTER11]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP7]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD12]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP8]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD13]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP9]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD7]], [[WIDE_LOAD14]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP10]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD8]], [[WIDE_LOAD15]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP25]], [[TMP24]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP26]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP27]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP8]], [[TMP7]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX16:%.*]] = fadd [[TMP9]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX17:%.*]] = fadd [[TMP10]], [[BIN_RDX16]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX17]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP30]], float [[TMP31]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP13]], float [[TMP14]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict @@ -1308,72 +1290,63 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP20]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP23]], align 4 -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP24]]) -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP25]]) -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], [[TMP26]]) -; CHECK-ORDERED-NEXT: [[TMP31]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], [[TMP27]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP3]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER1:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER2:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[VECTOR_POINTER]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[VECTOR_POINTER1]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[VECTOR_POINTER2]], align 4 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER6:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[VECTOR_POINTER6]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD11:%.*]] = load , ptr [[VECTOR_POINTER7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[VECTOR_POINTER8]], align 4 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD9]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD10]] +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = fmul [[WIDE_LOAD4]], [[WIDE_LOAD11]] +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = fmul [[WIDE_LOAD5]], [[WIDE_LOAD12]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP7]]) +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP11]], [[TMP8]]) +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP12]], [[TMP9]]) +; CHECK-ORDERED-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP13]], [[TMP10]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP33]], float [[TMP34]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP16]], float [[TMP17]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict @@ -1382,21 +1355,22 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5 +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP5]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP9]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP11]] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP13]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -1405,71 +1379,61 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP21]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP13]], [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP16]], [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP19]], [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP22]], [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP25]] -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP28]] -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP23]], [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP26]], [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP29]], [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP32]], [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = fmul [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = fmul [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP33]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP37]]) -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP34]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP38]], [[TMP39]]) -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP35]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], [[TMP41]]) -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP36]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = shl nuw i64 [[TMP48]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], [[TMP49]] -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP47]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT21:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT22:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT23:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP1]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER9:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER10:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP14]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER]], [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER9]], [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER10]], [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER14:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER15:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER16:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP15]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER14]], [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER15]], [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER16]], [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD17]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD18]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = fmul [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD19]] +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = fmul [[WIDE_MASKED_LOAD13]], [[WIDE_MASKED_LOAD20]] +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP20]]) +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP17]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP21]], [[TMP22]]) +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP18]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP23]], [[TMP24]]) +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP19]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP27]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP25]], [[TMP26]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 3 +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], [[TMP32]] +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT21]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP30]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT22]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP33]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT23]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP36]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = xor i1 [[TMP37]], true +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: ret float [[TMP44]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP27]] ; @@ -1524,75 +1488,66 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-UNORDERED: vector.ph: ; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP20]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP23]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP24]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP25]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP26]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP27]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP3]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER4:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_2]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER5:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_3]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[VECTOR_POINTER]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[VECTOR_POINTER4]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[VECTOR_POINTER5]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP3]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_2]] +; CHECK-UNORDERED-NEXT: [[VECTOR_POINTER11:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_3]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[VECTOR_POINTER9]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[VECTOR_POINTER10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD15:%.*]] = load , ptr [[VECTOR_POINTER11]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP7]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD12]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP8]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD13]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP9]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD7]], [[WIDE_LOAD14]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP10]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD8]], [[WIDE_LOAD15]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP25]], [[TMP24]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP26]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP27]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP8]], [[TMP7]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX16:%.*]] = fadd nnan [[TMP9]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX17:%.*]] = fadd nnan [[TMP10]], [[BIN_RDX16]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX17]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP30]], float [[TMP31]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP13]], float [[TMP14]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict_fmf @@ -1604,72 +1559,63 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-ORDERED: vector.ph: ; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 3 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 3 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP20]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP23]], align 4 -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP24]]) -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP25]]) -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], [[TMP26]]) -; CHECK-ORDERED-NEXT: [[TMP31]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], [[TMP27]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP3]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER1:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER2:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[VECTOR_POINTER]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[VECTOR_POINTER1]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[VECTOR_POINTER2]], align 4 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER6:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP3]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-NEXT: [[VECTOR_POINTER8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[VECTOR_POINTER6]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD11:%.*]] = load , ptr [[VECTOR_POINTER7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[VECTOR_POINTER8]], align 4 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD9]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD10]] +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = fmul nnan [[WIDE_LOAD4]], [[WIDE_LOAD11]] +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = fmul nnan [[WIDE_LOAD5]], [[WIDE_LOAD12]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP7]]) +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP11]], [[TMP8]]) +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP12]], [[TMP9]]) +; CHECK-ORDERED-NEXT: [[TMP14]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP13]], [[TMP10]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP33]], float [[TMP34]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP16]], float [[TMP17]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict_fmf @@ -1678,21 +1624,22 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] +; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5 +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP5]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP9]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP11]] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP13]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -1701,71 +1648,61 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP21]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP13]], [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP16]], [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP19]], [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP22]], [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP25]] -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP28]] -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP23]], [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP26]], [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP29]], [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP32]], [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = fmul nnan [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = fmul nnan [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP33]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP37]]) -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP34]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP38]], [[TMP39]]) -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP35]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], [[TMP41]]) -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP36]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = shl nuw i64 [[TMP48]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], [[TMP49]] -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP47]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT21:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT22:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT23:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw i64 [[TMP1]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[STEP_ADD_3:%.*]] = add nuw nsw i64 [[STEP_ADD_2]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER9:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER10:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP14]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER]], [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER9]], [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER10]], [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER14:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER15:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[STEP_ADD_2]] +; CHECK-ORDERED-TF-NEXT: [[VECTOR_POINTER16:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[STEP_ADD_3]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP15]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER14]], [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER15]], [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[VECTOR_POINTER16]], [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD17]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD18]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = fmul nnan [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD19]] +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = fmul nnan [[WIDE_MASKED_LOAD13]], [[WIDE_MASKED_LOAD20]] +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP16]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP20]]) +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP17]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP21]], [[TMP22]]) +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP18]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP23]], [[TMP24]]) +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP19]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP27]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP25]], [[TMP26]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 3 +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], [[TMP29]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], [[TMP32]] +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT21]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP30]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT22]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP33]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT23]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP36]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = xor i1 [[TMP37]], true +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: ret float [[TMP44]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP27]] ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index c56f8327a48b3..a217347652f2b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -19,7 +19,8 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 64 +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]] ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]] @@ -28,29 +29,19 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP11]], 5 +; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[TMP4]], [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP27]], [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP15]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP28]], align 1 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 5 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 48 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP15]] ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP29]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP20]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP23]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index bf4ab32fbf9e4..226defeeece77 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -20,16 +20,15 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: ; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 +; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]] ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP6]] +; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP5]] ; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP4]], align 1 ; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP7]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll index 9526a848f8eab..c5e0bbd0900b1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll @@ -79,8 +79,8 @@ define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out ; CHECK: [[ENTRY:.*:]] ; CHECK: [[VECTOR_PH:.*:]] ; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] -; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR2]] +; CHECK: [[TMP3:%.*]] = tail call { half, half } @foo(half [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] +; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP2:%.*]]) #[[ATTR2]] ; CHECK: [[MIDDLE_BLOCK:.*:]] ; CHECK: [[EXIT:.*:]] ; @@ -143,16 +143,13 @@ exit: define void @struct_return_scalable(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) #2 { ; CHECK-LABEL: define void @struct_return_scalable( ; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK: [[VECTOR_PH1:.*:]] ; CHECK: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK: [[VECTOR_BODY:.*:]] ; CHECK: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK: [[VECTOR_BODY1:.*:]] ; CHECK: [[TMP12:%.*]] = call { , } @scalable_vec_masked_foo( [[WIDE_LOAD:%.*]], splat (i1 true)) ; CHECK: [[TMP13:%.*]] = call { , } @scalable_vec_masked_foo( [[WIDE_LOAD1:%.*]], splat (i1 true)) -; CHECK: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() ; CHECK: [[MIDDLE_BLOCK:.*:]] ; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index b78ada07db1b3..e590855c0bc16 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -16,7 +16,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -25,9 +26,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD]]) @@ -43,7 +42,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -59,7 +58,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[TMP27]] = and i64 [[VEC_PHI8]], [[TMP26]] ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] @@ -75,7 +74,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[AND]] = and i64 [[RDX]], [[L3]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[AND_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index 27779d5ceb0ac..48f9483aeba9d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -16,7 +16,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -25,9 +26,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 8 ; CHECK-NEXT: [[TMP16]] = add [[WIDE_LOAD]], [[VEC_PHI]] @@ -42,7 +41,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP21]], [[VEC_EPILOG_ITER_CHECK]] ], [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -58,7 +57,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP26]] = add <2 x i64> [[WIDE_LOAD9]], [[VEC_PHI8]] ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP26]]) ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index ebc1c1ef1e773..6ec74e4ef5139 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -16,7 +16,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -24,9 +25,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) @@ -39,7 +38,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -54,7 +53,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[TMP24]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI7]], <2 x float> [[WIDE_LOAD8]]) ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 9c8a18725afa4..1ad8462b96bb0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -27,16 +27,15 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP5]] ; CHECK-NEXT: store splat (i8 1), ptr [[TMP4]], align 1 ; CHECK-NEXT: store splat (i8 1), ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -60,16 +59,15 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP5]] ; CHECK-VF8-NEXT: store splat (i8 1), ptr [[TMP4]], align 1 ; CHECK-VF8-NEXT: store splat (i8 1), ptr [[TMP7]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -119,16 +117,15 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP3]] ; CHECK-NEXT: store splat (i64 1), ptr [[TMP2]], align 1 ; CHECK-NEXT: store splat (i64 1), ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] @@ -151,16 +148,15 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP5]] ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP4]], align 1 ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP7]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -230,16 +226,15 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP5]] ; CHECK-NEXT: store splat (i64 1), ptr [[TMP4]], align 1 ; CHECK-NEXT: store splat (i64 1), ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -282,16 +277,15 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP5]] ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP4]], align 1 ; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP7]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -351,16 +345,15 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP4]] ; CHECK-NEXT: store zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -402,16 +395,15 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-VF8-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4 -; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP5]] +; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP4]] ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -473,22 +465,19 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP5]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP5]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] @@ -535,22 +524,19 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 -; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]] +; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP3]] ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP2]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP5]], align 4 ; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 -; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]] +; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP3]] ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] @@ -597,22 +583,19 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] @@ -636,22 +619,19 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: ; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 -; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]] +; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP3]] ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP2]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP5]], align 4 ; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 -; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]] +; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP3]] ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll index 4706798c525bd..398db69f6327d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll @@ -36,16 +36,15 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP5]] ; CHECK-NEXT: store splat (i8 1), ptr [[TMP4]], align 1 ; CHECK-NEXT: store splat (i8 1), ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -90,16 +89,15 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 { ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.ph: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP4]], 16 +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP7]], 2 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.body: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[TMP8]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[TMP7]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store splat (i8 1), ptr [[TMP6]], align 1 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store splat (i8 1), ptr [[TMP9]], align 1 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 51743cf636a14..81aaf7158609b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -28,24 +28,21 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 16 +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP6]], 8 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP8]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 2 ; CHECK-NEXT: [[TMP16:%.*]] = fneg [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17:%.*]] = fneg [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP8]] ; CHECK-NEXT: store [[TMP16]], ptr [[TMP18]], align 2 ; CHECK-NEXT: store [[TMP17]], ptr [[TMP22]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index d1b1771ab1532..f964447bb4acc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -218,6 +218,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 3 ; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i64 [[TMP7]], -1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] @@ -227,12 +228,11 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[DOTIDX1]] -; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP2]], 5 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[DOTIDX3]] -; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[DOTIDX4]] +; CHECK-NEXT: [[DOTIDX3:%.*]] = shl i64 [[TMP8]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[DOTIDX3]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_VEC]]) ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 @@ -240,9 +240,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_VEC1]]) ; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP12]], i64 [[DOTIDX]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP3]] ; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 47623f3b5d99e..d8d03a21397fb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6 ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize -S < %s -debug -prefer-predicate-over-epilogue=scalar-epilogue 2>%t | FileCheck %s ; RUN: cat %t | FileCheck %s --check-prefix=DEBUG @@ -9,10 +10,15 @@ target triple = "aarch64-unknown-linux-gnu" define void @induction_i7(ptr %dst) #0 { ; CHECK-LABEL: define void @induction_i7( -; CHECK-SAME: ptr [[DST:%.*]]) -; CHECK: vector.ph: +; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 @@ -26,21 +32,23 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP23:%.*]] = zext [[VEC_IND]] to ; CHECK-NEXT: [[TMP24:%.*]] = zext [[STEP_ADD]] to -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 1 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP40]] ; CHECK-NEXT: store [[TMP23]], ptr [[TMP21]], align 8 ; CHECK-NEXT: store [[TMP24]], ptr [[TMP28]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label %middle.block, label %[[VECTOR_BODY]] +; CHECK-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; entry: @@ -68,10 +76,15 @@ for.end: ; preds = %for.body define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-LABEL: define void @induction_i3_zext( -; CHECK-SAME: ptr [[DST:%.*]]) -; CHECK: vector.ph: +; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 @@ -85,21 +98,23 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = zext [[VEC_IND]] to ; CHECK-NEXT: [[TMP20:%.*]] = zext [[STEP_ADD]] to ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP40]] ; CHECK-NEXT: store [[TMP19]], ptr [[TMP21]], align 8 ; CHECK-NEXT: store [[TMP20]], ptr [[TMP26]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP27]], label %middle.block, label %[[VECTOR_BODY]] +; CHECK-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 9312306ce519a..8dc873388802d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -16,7 +16,8 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP6]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP10]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8 @@ -28,9 +29,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 1 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP10]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP30]], align 8 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP35]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index ed49dc5a7573f..70b5c38ecad76 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -28,7 +28,8 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP11]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] @@ -37,15 +38,12 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP13]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP13]] ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 ; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] @@ -98,7 +96,8 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP11]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] @@ -107,15 +106,12 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP13]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP13]] ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 ; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index 4c7f70ad4d15e..8b19b103b7286 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -43,7 +43,8 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP17]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -51,28 +52,20 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 1 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP27]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP27]], i64 [[TMP17]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP27]], align 8 ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP30]], align 8 -; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 1 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP31]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP31]], i64 [[TMP17]] ; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP31]], align 8 ; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP34]], align 8 ; CHECK-NEXT: [[TMP35:%.*]] = add [[WIDE_LOAD]], [[WIDE_LOAD13]] ; CHECK-NEXT: [[TMP36:%.*]] = add [[WIDE_LOAD12]], [[WIDE_LOAD14]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP43:%.*]] = shl nuw i64 [[TMP42]], 1 -; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP41]], i64 [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP41]], i64 [[TMP17]] ; CHECK-NEXT: store [[TMP35]], ptr [[TMP41]], align 8 ; CHECK-NEXT: store [[TMP36]], ptr [[TMP44]], align 8 -; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 1 -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP45]], i64 [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP45]], i64 [[TMP17]] ; CHECK-NEXT: store [[TMP35]], ptr [[TMP45]], align 8 ; CHECK-NEXT: store [[TMP36]], ptr [[TMP48]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index c8ecb7f864521..7b7915f729432 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -11,7 +11,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 4 +; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -40,14 +41,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]] -; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP53:%.*]] = shl nuw i64 [[TMP52]], 2 -; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] -; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP56:%.*]] = shl nuw i64 [[TMP55]], 3 +; CHECK-NEXT: [[TMP56:%.*]] = add nuw nsw i64 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP59:%.*]] = add nuw nsw i64 [[TMP56]], [[TMP1]] +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP59:%.*]] = mul nuw i64 [[TMP58]], 12 ; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP47]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP54]], [[ACTIVE_LANE_MASK7]]) @@ -97,7 +94,8 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP3]] @@ -126,14 +124,10 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT15:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[INDEX6]] -; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP53:%.*]] = shl nuw i64 [[TMP52]], 2 -; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] -; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP56:%.*]] = shl nuw i64 [[TMP55]], 3 +; CHECK-NEXT: [[TMP56:%.*]] = add nuw nsw i64 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP59:%.*]] = add nuw nsw i64 [[TMP56]], [[TMP1]] +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP59:%.*]] = mul nuw i64 [[TMP58]], 12 ; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP47]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP54]], [[ACTIVE_LANE_MASK7]], poison) @@ -148,15 +142,9 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[TMP71:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP63]], zeroinitializer ; CHECK-NEXT: [[TMP72:%.*]] = select [[ACTIVE_LANE_MASK9]], [[TMP64]], zeroinitializer ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]] -; CHECK-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP75:%.*]] = shl nuw i64 [[TMP74]], 2 -; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP75]] -; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP78:%.*]] = shl nuw i64 [[TMP77]], 3 -; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP81:%.*]] = mul nuw i64 [[TMP80]], 12 -; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP81]] +; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP56]] +; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP59]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP65]], [[TMP69]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP76]], [[TMP70]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP79]], [[TMP71]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll index df6431c1ee282..75c4969121915 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll @@ -61,17 +61,14 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[A]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[B]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] @@ -182,22 +179,19 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] @@ -252,22 +246,19 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] @@ -328,22 +319,19 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP6]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP6]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP6]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] @@ -402,22 +390,19 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll index 61da142ad376c..719e4a58a410c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll @@ -42,7 +42,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: br label [[VECTOR_PH1:%.*]] ; CHECK-UF4: vector.ph: ; CHECK-UF4-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 64 +; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 16 +; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 6 ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]] @@ -61,14 +62,10 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[TMP18]], [[VECTOR_PH1]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[TMP19]], [[VECTOR_PH1]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4 -; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP23]] -; CHECK-UF4-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 5 +; CHECK-UF4-NEXT: [[TMP32:%.*]] = add nuw nsw i64 [[TMP1]], [[TMP1]] +; CHECK-UF4-NEXT: [[TMP29:%.*]] = add nuw nsw i64 [[TMP32]], [[TMP1]] +; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP1]] ; CHECK-UF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP32]] -; CHECK-UF4-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP34]], 48 ; CHECK-UF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP29]] ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP20]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP24]], [[ACTIVE_LANE_MASK6]], poison) @@ -79,15 +76,9 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[TMP27:%.*]] = mul [[WIDE_MASKED_LOAD10]], splat (i8 3) ; CHECK-UF4-NEXT: [[TMP28:%.*]] = mul [[WIDE_MASKED_LOAD11]], splat (i8 3) ; CHECK-UF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 4 -; CHECK-UF4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP38]] -; CHECK-UF4-NEXT: [[TMP40:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP41:%.*]] = shl nuw i64 [[TMP40]], 5 -; CHECK-UF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP41]] -; CHECK-UF4-NEXT: [[TMP43:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], 48 -; CHECK-UF4-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP44]] +; CHECK-UF4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP1]] +; CHECK-UF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP32]] +; CHECK-UF4-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP29]] ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP25]], ptr align 1 [[TMP35]], [[ACTIVE_LANE_MASK]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP26]], ptr align 1 [[TMP39]], [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP27]], ptr align 1 [[TMP42]], [[ACTIVE_LANE_MASK7]]) @@ -109,7 +100,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-TF: vector.ph: ; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 +; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP10]], 2 ; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 ; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] @@ -124,17 +116,13 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK1:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-TF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; CHECK-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 -; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP11]] +; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP10]] ; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP9]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP12]], [[ACTIVE_LANE_MASK1]], poison) ; CHECK-TF-NEXT: [[TMP13:%.*]] = mul [[WIDE_MASKED_LOAD]], splat (i8 3) ; CHECK-TF-NEXT: [[TMP14:%.*]] = mul [[WIDE_MASKED_LOAD2]], splat (i8 3) ; CHECK-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-TF-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4 -; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[TMP17]] +; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[TMP10]] ; CHECK-TF-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP13]], ptr align 1 [[TMP15]], [[ACTIVE_LANE_MASK]]) ; CHECK-TF-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP14]], ptr align 1 [[TMP18]], [[ACTIVE_LANE_MASK1]]) ; CHECK-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] @@ -206,7 +194,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-UF4: vector.ph: ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-UF4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF4-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP4]], 3 ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]] @@ -225,14 +214,10 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[TMP14]], [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 1 -; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP28]] -; CHECK-UF4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 +; CHECK-UF4-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP1]], [[TMP1]] +; CHECK-UF4-NEXT: [[TMP24:%.*]] = add nuw nsw i64 [[TMP21]], [[TMP1]] +; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP1]] ; CHECK-UF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP21]] -; CHECK-UF4-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 6 ; CHECK-UF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP24]] ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP15]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP29]], [[ACTIVE_LANE_MASK6]], poison) @@ -243,15 +228,9 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[TMP18:%.*]] = fmul [[WIDE_MASKED_LOAD10]], splat (double 3.000000e+00) ; CHECK-UF4-NEXT: [[TMP19:%.*]] = fmul [[WIDE_MASKED_LOAD11]], splat (double 3.000000e+00) ; CHECK-UF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 1 -; CHECK-UF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP33]] -; CHECK-UF4-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP36:%.*]] = shl nuw i64 [[TMP35]], 2 -; CHECK-UF4-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP36]] -; CHECK-UF4-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP39:%.*]] = mul nuw i64 [[TMP38]], 6 -; CHECK-UF4-NEXT: [[TMP40:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP39]] +; CHECK-UF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP1]] +; CHECK-UF4-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP21]] +; CHECK-UF4-NEXT: [[TMP40:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP24]] ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP16]], ptr align 8 [[TMP30]], [[ACTIVE_LANE_MASK]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP17]], ptr align 8 [[TMP34]], [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP18]], ptr align 8 [[TMP37]], [[ACTIVE_LANE_MASK7]]) @@ -276,7 +255,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-TF: vector.ph: ; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 2 +; CHECK-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP10]], 2 ; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] @@ -291,17 +271,13 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK1:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-TF-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 -; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP11]] +; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]] ; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP9]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP12]], [[ACTIVE_LANE_MASK1]], poison) ; CHECK-TF-NEXT: [[TMP13:%.*]] = fmul [[WIDE_MASKED_LOAD]], splat (double 3.000000e+00) ; CHECK-TF-NEXT: [[TMP14:%.*]] = fmul [[WIDE_MASKED_LOAD2]], splat (double 3.000000e+00) ; CHECK-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] -; CHECK-TF-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-TF-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 1 -; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP17]] +; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP10]] ; CHECK-TF-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP13]], ptr align 8 [[TMP15]], [[ACTIVE_LANE_MASK]]) ; CHECK-TF-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP14]], ptr align 8 [[TMP18]], [[ACTIVE_LANE_MASK1]]) ; CHECK-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 3b2b0b5c33aa9..3082a49babed4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -23,6 +23,7 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 ; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] @@ -47,17 +48,13 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[TMP9]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = add nsw [[TMP11]], splat (i32 1) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP16]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[DOTIDX]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP15]], i64 [[TMP3]] ; CHECK-NEXT: store [[TMP13]], ptr [[TMP15]], align 4 ; CHECK-NEXT: store [[TMP14]], ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = add nsw [[TMP10]], splat (i32 1) ; CHECK-NEXT: [[TMP19:%.*]] = add nsw [[TMP12]], splat (i32 1) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX5:%.*]] = shl nuw nsw i64 [[TMP21]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 [[DOTIDX5]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i64 [[TMP3]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP20]], align 4 ; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -134,6 +131,7 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3 ; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] @@ -148,16 +146,12 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX4]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP7]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[DOTIDX]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = shl nsw [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD6]], splat (i32 1) -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX7:%.*]] = shl nuw nsw i64 [[TMP11]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 [[DOTIDX7]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP7]] ; CHECK-NEXT: store [[TMP9]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: store [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 871d9be609bd7..5984bedcf0432 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -388,8 +388,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP15]], 4 +; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[DOTIDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP17]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll index dcb890670e33b..3716122f28caf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll @@ -36,41 +36,38 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; INTERLEAVE-NEXT: entry: ; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]]) -; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 2 +; INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP4]]) +; INTERLEAVE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 [[N]]) ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[DOTIDX:%.*]] = shl i64 [[TMP8]], 4 -; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[DOTIDX]] -; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) +; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] +; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP8]], i64 [[TMP1]] +; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP8]], [[ACTIVE_LANE_MASK]], poison) ; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP9]], [[ACTIVE_LANE_MASK2]], poison) ; INTERLEAVE-NEXT: [[TMP10:%.*]] = call @foo_uniform( [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; INTERLEAVE-NEXT: [[TMP11:%.*]] = call @foo_uniform( [[WIDE_MASKED_LOAD3]], i64 [[UNIFORM]], [[ACTIVE_LANE_MASK2]]) ; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[DOTIDX5:%.*]] = shl i64 [[TMP13]], 4 -; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]] +; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP1]] ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP10]], ptr align 8 [[TMP12]], [[ACTIVE_LANE_MASK]]) -; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr align 8 [[TMP14]], [[ACTIVE_LANE_MASK2]]) -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 -; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) -; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr align 8 [[TMP13]], [[ACTIVE_LANE_MASK2]]) +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 +; INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP5]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 [[TMP5]]) +; INTERLEAVE-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; INTERLEAVE-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void ; @@ -122,41 +119,38 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; INTERLEAVE-NEXT: entry: ; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]]) -; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 2 +; INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP4]]) +; INTERLEAVE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 [[N]]) ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[DOTIDX:%.*]] = shl i64 [[TMP8]], 4 -; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[DOTIDX]] -; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) +; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] +; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP8]], i64 [[TMP1]] +; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP8]], [[ACTIVE_LANE_MASK]], poison) ; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP9]], [[ACTIVE_LANE_MASK2]], poison) ; INTERLEAVE-NEXT: [[TMP10:%.*]] = call @bar_uniform( [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; INTERLEAVE-NEXT: [[TMP11:%.*]] = call @bar_uniform( [[WIDE_MASKED_LOAD3]], i32 [[UNIFORM]], [[ACTIVE_LANE_MASK2]]) ; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[DOTIDX5:%.*]] = shl i64 [[TMP13]], 4 -; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]] +; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP1]] ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP10]], ptr align 8 [[TMP12]], [[ACTIVE_LANE_MASK]]) -; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr align 8 [[TMP14]], [[ACTIVE_LANE_MASK2]]) -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 -; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) -; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr align 8 [[TMP13]], [[ACTIVE_LANE_MASK2]]) +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1 +; INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]] +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP5]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 [[TMP5]]) +; INTERLEAVE-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; INTERLEAVE-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void ; @@ -188,7 +182,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[GEPDST:%.*]] = getelementptr inbounds nuw double, ptr [[DST]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -212,7 +206,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; INTERLEAVE: pred.store.if: ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR4:[0-9]+]] +; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; INTERLEAVE-NEXT: store double [[TMP3]], ptr [[TMP4]], align 8 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -222,7 +216,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP5]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR4]] +; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR5]] ; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP5]] ; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 6df3f1b418eb6..33a6d1dc0df80 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -26,10 +26,10 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) "target-features"="+neon,+do ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi vp<[[RDX_START]]>, vp<[[REDUCE:%.+]]> (VF scaled by 1/4) ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a> +; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>, ir<0> ; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]> ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b> +; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b>, ir<0> ; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]> ; CHECK-NEXT: EXPRESSION vp<[[REDUCE]]> = ir<[[ACC]]> + partial.reduce.add (mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32)) ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> @@ -83,9 +83,11 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) "target-features"="+neon,+do ; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<[[RDX_START]]>, ir<%add> (VF scaled by 1/4) ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index> -; CHECK-NEXT: WIDEN ir<%load.a> = load ir<%gep.a> +; CHECK-NEXT: vp<%2> = vector-pointer ir<%gep.a>, ir<0> +; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%2> ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index> -; CHECK-NEXT: WIDEN ir<%load.b> = load ir<%gep.b> +; CHECK-NEXT: vp<%3> = vector-pointer ir<%gep.b>, ir<0> +; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%3> ; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 ; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 478c9c1141949..615852f960bd4 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -175,14 +175,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_N33:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]] ; CHECK-NEXT: br i1 [[CMP_N33]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL46:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX47:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL48:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX57:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL58:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL46]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX47]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL48]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL56]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX57]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL58]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[PTR_IV]], align 1 ; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP53]] to i64 ; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll index dc9c154b3fe05..3cabe1a7627b0 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll @@ -96,11 +96,11 @@ define void @test(ptr %arr, i32 %len) { ; CHECK-NEXT: br i1 [[CMP_N28]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX29:%.*]] = phi double [ [[TMP28]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP21]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX35:%.*]] = phi double [ [[TMP28]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP21]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDX_05:%.*]] = phi double [ [[BC_MERGE_RDX29]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REDX_05:%.*]] = phi double [ [[BC_MERGE_RDX35]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP29:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[ADD]] = fadd fast double [[TMP29]], [[REDX_05]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 414e5d9295554..34201fffc0db2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -375,16 +375,15 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 +; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED-UF2: vector.body: ; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] -; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP6]] +; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP5]] ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], splat (i32 1) @@ -498,16 +497,15 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 +; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED-UF2: vector.body: ; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] -; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP6]] +; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP5]] ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], splat (i32 1) @@ -681,24 +679,21 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 +; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP6]], 4 +; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = mul i64 [[TMP9]], 2 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP7]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED-UF2: vector.body: ; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P]], i64 [[INDEX]] -; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP10]] +; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP11]], align 4 ; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = add [[WIDE_LOAD4]], splat (i32 1) ; NOSTRIDED-UF2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P2]], i64 [[INDEX]] -; NOSTRIDED-UF2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 [[TMP16]] +; NOSTRIDED-UF2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 [[TMP9]] ; NOSTRIDED-UF2-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 ; NOSTRIDED-UF2-NEXT: store [[TMP13]], ptr [[TMP17]], align 4 ; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] @@ -996,16 +991,15 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 +; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 2 ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED-UF2: vector.body: ; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] -; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP6]] +; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP5]] ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], splat (i32 1) @@ -1351,9 +1345,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( align 8 [[TMP7]], splat (i1 true), poison) ; NOSTRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( align 8 [[TMP8]], splat (i1 true), poison) ; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT:%.*]], i64 [[INDEX]] -; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 -; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP11]] +; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP3]] ; NOSTRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8 ; NOSTRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8 ; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] @@ -1435,9 +1427,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( align 8 [[TMP7]], splat (i1 true), poison) ; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( align 8 [[TMP8]], splat (i1 true), poison) ; STRIDED-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT:%.*]], i64 [[INDEX]] -; STRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 -; STRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP11]] +; STRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP3]] ; STRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8 ; STRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8 ; STRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 642cd467c839f..8a0e315b75864 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -44,7 +44,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> ; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; IF-EVL-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> +; IF-EVL-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]>, ir<0> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) @@ -84,7 +84,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> +; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]>, ir<0> ; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 @@ -121,7 +121,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; NO-VP-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; NO-VP-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> +; NO-VP-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]>, ir<0> ; NO-VP-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> ; NO-VP-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> ; NO-VP-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> @@ -169,7 +169,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; NO-VP-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> +; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]>, ir<0> ; NO-VP-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> ; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reduce.add (ir<[[LD1]]>) ; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 3ab090ee46ab4..5890e78c357cc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -28,14 +28,14 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> -; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]>, ir<0> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> -; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP2]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP2]]>, ir<0> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP3]]> +; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP3]]>, ir<0> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> @@ -59,14 +59,14 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> -; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> +; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]>, ir<0> ; NO-VP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> ; NO-VP-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> -; NO-VP-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP2]]> +; NO-VP-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP2]]>, ir<0> ; NO-VP-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]> ; NO-VP-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> ; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP3]]> +; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP3]]>, ir<0> ; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> ; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> @@ -108,11 +108,11 @@ define void @safe_dep(ptr %p) { ; CHECK-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr ir<%p>, vp<[[ST]]> -; CHECK-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; CHECK-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>, ir<0> ; CHECK-NEXT: WIDEN ir<[[V:%.+]]> = load vp<[[PTR1]]> ; CHECK-NEXT: CLONE ir<[[OFFSET:.+]]> = add vp<[[ST]]>, ir<100> ; CHECK-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr ir<%p>, ir<[[OFFSET]]> -; CHECK-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; CHECK-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[PTR2]]>, ir<[[V]]> ; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 41249c595f9eb..f964fc6f67854 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -86,12 +86,12 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]: -; AUTO_VEC-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; AUTO_VEC-NEXT: [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL12:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL13:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: -; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ] -; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ] +; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL12]], %[[VEC_EPILOG_SCALAR_PH]] ] +; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 @@ -225,11 +225,11 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; AUTO_VEC: [[SCALAR_PH]]: ; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; AUTO_VEC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: ; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ] ; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] ; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8 ; AUTO_VEC-NEXT: [[I_NEXT]] = add i64 [[I]], 1 @@ -381,12 +381,12 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]] ; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]: -; AUTO_VEC-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; AUTO_VEC-NEXT: [[BC_RESUME_VAL15:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL16:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL17:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: -; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] -; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index da48f984cb329..7f7d0be099601 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -222,14 +222,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ADD7_LCSSA]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] @@ -453,14 +453,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; MAX-BW: [[VEC_EPILOG_SCALAR_PH]]: ; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; MAX-BW-NEXT: [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; MAX-BW-NEXT: br label %[[FOR_BODY:.*]] ; MAX-BW: [[FOR_COND_CLEANUP]]: ; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; MAX-BW-NEXT: ret i32 [[ADD7_LCSSA]] ; MAX-BW: [[FOR_BODY]]: ; MAX-BW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ] +; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ] ; MAX-BW-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] ; MAX-BW-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 6e5213568c735..cf4342d8f3885 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -44,16 +44,16 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) { ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP6:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP7:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 -; CHECK-NEXT: [[TMP6]] = add i64 [[TMP5]], [[R]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP7]] = add i64 [[TMP6]], [[R]] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[TMP6]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP7]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[TMP8]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 0d9d28d079b92..af26de9403903 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -164,12 +164,12 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP27]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX9]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV1]] ; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV1]] ; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index a071949f82062..f1f1c49e8036d 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -1519,12 +1519,12 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK-VF4IC4: [[SCALAR_PH]]: ; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ] -; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ] ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC4: [[FOR_BODY]]: ; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL13]], %[[SCALAR_PH]] ] +; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL18]], %[[SCALAR_PH]] ] ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]] ; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index d140bc09fe731..627e0910e6266 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -11,16 +11,15 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 @@ -32,9 +31,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP15:%.*]] = fadd [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP4]] ; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store [[TMP15]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -85,7 +82,8 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP6]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) @@ -94,17 +92,13 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP15:%.*]] = fadd [[WIDE_LOAD3]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP3]] ; CHECK-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store [[TMP15]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] @@ -173,17 +167,13 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], splat (float 2.300000e+01), splat (float 4.200000e+01) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select [[TMP10]], splat (float 2.300000e+01), splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP5]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = fmul [[PREDPHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = fmul [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP5]] ; CHECK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 ; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 1216bc1dc33cc..2315f3d092d7e 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -96,7 +96,8 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 +; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -109,9 +110,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-VF4UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]] -; CHECK-VF4UF2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 -; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP12]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 4 ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load , ptr [[TMP22]], align 4 ; CHECK-VF4UF2-NEXT: [[TMP23:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) @@ -119,9 +118,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = add [[WIDE_LOAD]], [[TMP23]] ; CHECK-VF4UF2-NEXT: [[TMP27:%.*]] = add [[WIDE_LOAD3]], [[TMP24]] -; CHECK-VF4UF2-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = shl nuw i64 [[TMP29]], 2 -; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP30]] +; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP12]] ; CHECK-VF4UF2-NEXT: store [[TMP26]], ptr [[TMP25]], align 4 ; CHECK-VF4UF2-NEXT: store [[TMP27]], ptr [[TMP31]], align 4 ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] @@ -251,7 +248,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP11]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() @@ -265,9 +263,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2 -; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP13]] +; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP11]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD2]] = load , ptr [[TMP14]], align 4 ; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) @@ -451,7 +447,8 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 +; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP11]], 4 +; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul i64 [[TMP13]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]] @@ -467,11 +464,9 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-VF4UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-VF4UF2-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 2 -; CHECK-VF4UF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i64 [[TMP22]] +; CHECK-VF4UF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i64 [[TMP13]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 2, !alias.scope [[META6:![0-9]+]] -; CHECK-VF4UF2-NEXT: [[WIDE_LOAD4]] = load , ptr [[TMP23]], align 2, !alias.scope [[META6]] +; CHECK-VF4UF2-NEXT: [[WIDE_LOAD4]] = load , ptr [[TMP20]], align 2, !alias.scope [[META6]] ; CHECK-VF4UF2-NEXT: [[TMP24:%.*]] = call @llvm.vector.splice.nxv4i16( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) ; CHECK-VF4UF2-NEXT: [[TMP25:%.*]] = call @llvm.vector.splice.nxv4i16( [[WIDE_LOAD]], [[WIDE_LOAD4]], i32 -1) ; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = sitofp [[WIDE_LOAD]] to @@ -483,11 +478,9 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: [[TMP32:%.*]] = fsub fast [[TMP26]], [[TMP30]] ; CHECK-VF4UF2-NEXT: [[TMP33:%.*]] = fsub fast [[TMP27]], [[TMP31]] ; CHECK-VF4UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-VF4UF2-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP37:%.*]] = shl nuw i64 [[TMP36]], 2 -; CHECK-VF4UF2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[TMP37]] +; CHECK-VF4UF2-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[TMP13]] ; CHECK-VF4UF2-NEXT: store [[TMP32]], ptr [[TMP34]], align 8, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] -; CHECK-VF4UF2-NEXT: store [[TMP33]], ptr [[TMP38]], align 8, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-VF4UF2-NEXT: store [[TMP33]], ptr [[TMP35]], align 8, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-VF4UF2-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -809,7 +802,8 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP7]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() @@ -822,11 +816,9 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP12]] -; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2 -; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 [[TMP16]] +; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 [[TMP7]] ; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 2, !alias.scope [[META17:![0-9]+]] -; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load , ptr [[TMP17]], align 2, !alias.scope [[META17]] +; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load , ptr [[TMP14]], align 2, !alias.scope [[META17]] ; CHECK-VF4UF2-NEXT: [[TMP18:%.*]] = call @llvm.vector.splice.nxv4i16( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) ; CHECK-VF4UF2-NEXT: [[TMP19:%.*]] = call @llvm.vector.splice.nxv4i16( [[WIDE_LOAD]], [[WIDE_LOAD3]], i32 -1) ; CHECK-VF4UF2-NEXT: [[TMP20:%.*]] = sext [[TMP18]] to @@ -836,11 +828,9 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: [[TMP24:%.*]] = mul nsw [[TMP22]], [[TMP20]] ; CHECK-VF4UF2-NEXT: [[TMP25:%.*]] = mul nsw [[TMP23]], [[TMP21]] ; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4UF2-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2 -; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP29]] +; CHECK-VF4UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP7]] ; CHECK-VF4UF2-NEXT: store [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] -; CHECK-VF4UF2-NEXT: store [[TMP25]], ptr [[TMP30]], align 4, !alias.scope [[META20]], !noalias [[META17]] +; CHECK-VF4UF2-NEXT: store [[TMP25]], ptr [[TMP27]], align 4, !alias.scope [[META20]], !noalias [[META17]] ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index d87d39e684993..3c88c0eb350e1 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -29,17 +29,13 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[TMP10]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[DOTIDX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTIDX3:%.*]] = shl i64 [[TMP15]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[DOTIDX3]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP8]] ; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -107,15 +103,13 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 8 ; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] ; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP2]] ; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store [[TMP12]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index cb61fc6e0a046..e8575aba6003d 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -30,9 +30,8 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP9]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP10]], align 2 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 2 ; CHECK-NEXT: [[TMP15:%.*]] = add [[BROADCAST_SPLAT]], [[STEP_ADD]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index bdc8734c45f2e..2b5875cdcf503 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -42,24 +42,21 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) { ; CHECKUF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECKUF2: [[VECTOR_PH]]: ; CHECKUF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECKUF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECKUF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP3]], 2 ; CHECKUF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECKUF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECKUF2-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECKUF2: [[VECTOR_BODY]]: ; CHECKUF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECKUF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]] -; CHECKUF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP8]], 2 -; CHECKUF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 [[TMP16]] +; CHECKUF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 [[TMP3]] ; CHECKUF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 8 ; CHECKUF2-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP9]], align 8 ; CHECKUF2-NEXT: [[TMP10:%.*]] = fadd [[WIDE_LOAD]], splat (double 1.000000e+00) ; CHECKUF2-NEXT: [[TMP11:%.*]] = fadd [[WIDE_LOAD3]], splat (double 1.000000e+00) ; CHECKUF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] -; CHECKUF2-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP13]], 2 -; CHECKUF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP17]] +; CHECKUF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP3]] ; CHECKUF2-NEXT: store [[TMP10]], ptr [[TMP12]], align 8 ; CHECKUF2-NEXT: store [[TMP11]], ptr [[TMP14]], align 8 ; CHECKUF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 83f1d30c64321..d30f1b251a994 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -12,7 +12,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 256, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 256, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -23,9 +24,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP14:%.*]] = and [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP15:%.*]] = and [[VEC_PHI1]], splat (i32 255) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP9]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP26:%.*]] = zext [[WIDE_LOAD]] to diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index ee3564bc87be4..c61a0bb5a9f66 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -42,11 +42,11 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION ir\<0\>, vp\<[[CAN_IV_NEXT:%.+]]\>\l" + ; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>, vp\<[[VF]]\>\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr inbounds ir\<%y\>, vp\<[[STEPS]]\>\l" + -; CHECK-NEXT: " vp\<[[VEC_PTR:%.+]]\> = vector-pointer inbounds ir\<%arrayidx\>\l" + +; CHECK-NEXT: " vp\<[[VEC_PTR:%.+]]\> = vector-pointer inbounds ir\<%arrayidx\>, ir\<0\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load vp\<[[VEC_PTR]]\>\l" + ; CHECK-NEXT: " WIDEN-INTRINSIC ir\<%call\> = call llvm.sqrt(ir\<%lv\>)\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx2\> = getelementptr inbounds ir\<%x\>, vp\<[[STEPS]]\>\l" + -; CHECK-NEXT: " vp\<[[VEC_PTR2:%.+]]\> = vector-pointer inbounds ir\<%arrayidx2\>\l" + +; CHECK-NEXT: " vp\<[[VEC_PTR2:%.+]]\> = vector-pointer inbounds ir\<%arrayidx2\>, ir\<0\>\l" + ; CHECK-NEXT: " WIDEN store vp\<[[VEC_PTR2]]\>, ir\<%call\>\l" + ; CHECK-NEXT: " EMIT vp\<[[CAN_IV_NEXT]]\> = add nuw vp\<[[CAN_IV]]\>, vp\<[[VFxUF]]\>\l" + ; CHECK-NEXT: " EMIT branch-on-count vp\<[[CAN_IV_NEXT]]\>, vp\<[[VEC_TC]]\>\l" + diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 2c260ef9f4963..df88bcf5b25aa 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -21,7 +21,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%dst>, vp<[[STEPS:%.+]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%iv> ; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VEC_TC]]> @@ -77,10 +77,10 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> ; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4> -; CHECK-NEXT: vp<%5> = vector-pointer ir<%q> +; CHECK-NEXT: vp<%5> = vector-pointer ir<%q>, ir<0> ; CHECK-NEXT: WIDEN ir<%x> = load vp<%5> ; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv> -; CHECK-NEXT: vp<%6> = vector-pointer ir<%q> +; CHECK-NEXT: vp<%6> = vector-pointer ir<%q>, ir<0> ; CHECK-NEXT: WIDEN store vp<%6>, ir<%y> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> @@ -110,9 +110,10 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: EMIT-SCALAR vp<[[SCALAR_PHI:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] ; CHECK-NEXT: WIDEN-PHI ir<%iv> = phi [ vp<[[INDUCTION]]>, vector.ph ], [ vp<%vec.ind.next>, vector.body ] ; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_PHI]]> -; CHECK-NEXT: WIDEN ir<%x> = load ir<%q> +; CHECK-NEXT: vp<%8> = vector-pointer ir<%q>, ir<0> +; CHECK-NEXT: WIDEN ir<%x> = load vp<%8> ; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv> -; CHECK-NEXT: WIDEN store ir<%q>, ir<%y> +; CHECK-NEXT: WIDEN store vp<%8>, ir<%y> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[SCALAR_PHI]]>, ir<8> ; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%iv>, vp<[[BROADCAST_INC]]> ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index cf85f26992c2f..741499efa811a 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -27,7 +27,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]> ; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1 -; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]> +; CHECK-NEXT: vp<[[VPTR:%.+]]> = vector-pointer vp<[[PTR]]>, ir<0> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VPTR]]> ; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12> ; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13> ; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 37cb1d2331d48..8a713d7d20af1 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -27,10 +27,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[PADD:%.+]]> = ptradd ir<%A>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VPTR:%.]]> = vector-pointer vp<[[PADD]]> +; CHECK-NEXT: vp<[[VPTR:%.]]> = vector-pointer vp<[[PADD]]>, ir<0> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VPTR]]> ; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10> -; CHECK-NEXT: vp<[[VPTR2:%.+]]> = vector-pointer vp<[[PADD]]> +; CHECK-NEXT: vp<[[VPTR2:%.+]]> = vector-pointer vp<[[PADD]]>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV:%.+]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -69,12 +69,13 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<1> -; CHECK-NEXT: WIDEN ir<%l> = load ir<%A> +; CHECK-NEXT: vp<[[VPTR1:%.]]> = vector-pointer ir<%A>, ir<0> +; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<8> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VPTR1]]> ; CHECK-NEXT: WIDEN ir<%l>.1 = load vp<[[VPTR2]]> ; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10> ; CHECK-NEXT: WIDEN ir<%add>.1 = add nsw ir<%l>.1, ir<10> -; CHECK-NEXT: WIDEN store ir<%A>, ir<%add> +; CHECK-NEXT: WIDEN store vp<[[VPTR1]]>, ir<%add> ; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add>.1 ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 0ad720825336b..22025e251278d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -539,9 +539,11 @@ define i32 @print_mulacc_sub(ptr %a, ptr %b) { ; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index> -; CHECK-NEXT: WIDEN ir<%load.a> = load ir<%gep.a> +; CHECK-NEXT: vp<%1> = vector-pointer ir<%gep.a>, ir<0> +; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%1> ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index> -; CHECK-NEXT: WIDEN ir<%load.b> = load ir<%gep.b> +; CHECK-NEXT: vp<%2> = vector-pointer ir<%gep.b>, ir<0> +; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%2> ; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 ; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> @@ -658,9 +660,11 @@ define i32 @print_mulacc_negated(ptr %a, ptr %b) { ; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index> -; CHECK-NEXT: WIDEN ir<%load.a> = load ir<%gep.a> +; CHECK-NEXT: vp<%1> = vector-pointer ir<%gep.a>, ir<0> +; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%1> ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index> -; CHECK-NEXT: WIDEN ir<%load.b> = load ir<%gep.b> +; CHECK-NEXT: vp<%2> = vector-pointer ir<%gep.b>, ir<0> +; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%2> ; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 ; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 7c562be8f65af..0a2c0ee35e4fe 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -25,11 +25,11 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%arrayidx>, ir<0> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-INTRINSIC ir<%call> = call llvm.sqrt(ir<%lv>) ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%arrayidx2> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%arrayidx2>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -94,13 +94,13 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr inbounds ir<%y>, ir<%iv> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%arrayidx>, ir<0> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp eq ir<%arrayidx>, ir<%z> ; CHECK-NEXT: WIDEN-SELECT ir<%sel> = select ir<%cmp>, ir<1.000000e+01>, ir<2.000000e+01> ; CHECK-NEXT: WIDEN ir<%add> = fadd ir<%lv>, ir<%sel> ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%arrayidx2> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%arrayidx2>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -189,7 +189,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: if.then.0: ; CHECK-NEXT: BLEND ir<%d> = ir<0> vp<[[PRED]]>/ir<%cmp> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -348,7 +348,7 @@ define void @recipe_debug_loc_location(ptr nocapture %src) !dbg !5 { ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>, !dbg /tmp/s.c:5:3 -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%isd>, !dbg /tmp/s.c:6:3 +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%isd>, ir<0>, !dbg /tmp/s.c:6:3 ; CHECK-NEXT: WIDEN ir<%lsd> = load vp<[[VEC_PTR]]>, !dbg /tmp/s.c:6:3 ; CHECK-NEXT: WIDEN ir<%psd> = add nuw nsw ir<%lsd>, ir<23>, !dbg /tmp/s.c:7:3 ; CHECK-NEXT: WIDEN ir<%cmp1> = icmp slt ir<%lsd>, ir<100>, !dbg /tmp/s.c:8:3 @@ -375,7 +375,7 @@ define void @recipe_debug_loc_location(ptr nocapture %src) !dbg !5 { ; CHECK-EMPTY: ; CHECK-NEXT: if.then.0: ; CHECK-NEXT: BLEND ir<%ysd.0> = ir<%psd> vp<[[PHI]]>/vp<[[OR1]]>, !dbg /tmp/s.c:14:3 -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%isd>, !dbg /tmp/s.c:15:3 +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%isd>, ir<0>, !dbg /tmp/s.c:15:3 ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0>, !dbg /tmp/s.c:15:3 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -527,7 +527,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -591,13 +591,13 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.y> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.y> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.y>, ir<0> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%add> = fadd nnan ir<%lv>, ir<1.000000e+00> ; CHECK-NEXT: WIDEN ir<%mul> = fmul fast ir<%add>, ir<2.000000e+00> ; CHECK-NEXT: WIDEN ir<%div> = fdiv reassoc nsz contract ir<%mul>, ir<2.000000e+00> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%div> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -662,12 +662,12 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x>, ir<0> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%div.1> = udiv exact ir<%lv>, ir<20> ; CHECK-NEXT: WIDEN ir<%div.2> = udiv ir<%lv>, ir<60> ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%div.1>, ir<%div.2> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%gep.x> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%gep.x>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -731,7 +731,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%ld.addr> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%ld.addr>, ir<0> ; CHECK-NEXT: WIDEN ir<%ld.value> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%ifcond> = fcmp oeq ir<%ld.value>, ir<5.000000e+00> ; CHECK-NEXT: Successor(s): pred.call @@ -757,7 +757,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: WIDEN ir<%fadd> = fadd vp<[[PHI1]]>, vp<[[PHI2]]> ; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value> ir<%fadd>/ir<%ifcond> ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%st.addr> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%st.addr>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -831,12 +831,12 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x>, ir<0> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%or.1> = or disjoint ir<%lv>, ir<1> ; CHECK-NEXT: WIDEN ir<%or.2> = or ir<%lv>, ir<3> ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.x>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%add> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -900,7 +900,7 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%p>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx>, ir<0> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l> ; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-element ir<%zext> @@ -947,11 +947,11 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.ptr> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer inbounds ir<%gep.ptr>, ir<0> ; CHECK-NEXT: WIDEN ir<%for.1.next> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: EMIT vp<[[FOR1_SPLICE:%.+]]> = first-order splice ir<%for.1>, ir<%for.1.next> ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[FOR1_SPLICE]]>, ir<1> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%gep.ptr> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer inbounds ir<%gep.ptr>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -1019,16 +1019,16 @@ define void @print_select_with_fastmath_flags(ptr noalias %a, ptr noalias %b, pt ; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT_EXIT:%.+]]> ; CHECK-NEXT: vp<[[ST:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%b>, vp<[[ST]]> -; CHECK-NEXT: vp<[[PTR1:%.+]]> = vector-pointer inbounds nuw ir<[[GEP1]]> +; CHECK-NEXT: vp<[[PTR1:%.+]]> = vector-pointer inbounds nuw ir<[[GEP1]]>, ir<0> ; CHECK-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> ; CHECK-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%c>, vp<[[ST]]> -; CHECK-NEXT: vp<[[PTR2:%.+]]> = vector-pointer inbounds nuw ir<[[GEP2]]> +; CHECK-NEXT: vp<[[PTR2:%.+]]> = vector-pointer inbounds nuw ir<[[GEP2]]>, ir<0> ; CHECK-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]> ; CHECK-NEXT: WIDEN ir<[[FCMP:%.+]]> = fcmp ogt fast ir<[[LD1]]>, ir<[[LD2]]> ; CHECK-NEXT: WIDEN ir<[[FADD:%.+]]> = fadd fast ir<[[LD1]]>, ir<1.000000e+01> ; CHECK-NEXT: WIDEN-SELECT ir<[[SELECT:%.+]]> = select fast ir<[[FCMP]]>, ir<[[FADD]]>, ir<[[LD2]]> ; CHECK-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds nuw ir<%a>, vp<[[ST]]> -; CHECK-NEXT: vp<[[PTR3:%.+]]> = vector-pointer inbounds nuw ir<[[GEP3]]> +; CHECK-NEXT: vp<[[PTR3:%.+]]> = vector-pointer inbounds nuw ir<[[GEP3]]>, ir<0> ; CHECK-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[SELECT]]> ; CHECK-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>