diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 912c893123095..4856ebebb596f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -276,6 +276,20 @@ class VPBuilder { return tryInsertInstruction(new VPPhi(IncomingValues, DL, Name)); } + VPValue *createElementCount(Type *Ty, ElementCount EC) { + VPlan &Plan = *getInsertBlock()->getPlan(); + VPValue *RuntimeEC = + Plan.getOrAddLiveIn(ConstantInt::get(Ty, EC.getKnownMinValue())); + if (EC.isScalable()) { + VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty); + RuntimeEC = EC.getKnownMinValue() == 1 + ? VScale + : createOverflowingOp(Instruction::Mul, + {VScale, RuntimeEC}, {true, false}); + } + return RuntimeEC; + } + /// Convert the input value \p Current to the corresponding value of an /// induction with \p Start and \p Step values, using \p Start + \p Current * /// \p Step. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ea39ca4a7f9b4..d201fbccbd91d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7308,6 +7308,7 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::materializeVectorTripCount( BestVPlan, VectorPH, CM.foldTailByMasking(), CM.requiresScalarEpilogue(BestVF.isVector())); + VPlanTransforms::materializeVFAndVFxUF(BestVPlan, VectorPH, BestVF); // Perform the actual loop transformation. VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan, @@ -7364,7 +7365,6 @@ DenseMap LoopVectorizationPlanner::executePlan( //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - BestVPlan.prepareToExecute(State); replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB); // Move check blocks to their final position. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 53baa58aed95c..3682bd0e17200 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -950,22 +950,6 @@ VPlan::~VPlan() { delete BackedgeTakenCount; } -void VPlan::prepareToExecute(VPTransformState &State) { - IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); - Type *TCTy = VPTypeAnalysis(*this).inferScalarType(getTripCount()); - // FIXME: Model VF * UF computation completely in VPlan. - unsigned UF = getUF(); - if (VF.getNumUsers()) { - Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF); - VF.setUnderlyingValue(RuntimeVF); - VFxUF.setUnderlyingValue( - UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF)) - : RuntimeVF); - } else { - VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF)); - } -} - VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const { auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) { return VPIRBB->getIRBasicBlock() == IRBB; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5c3edee13020f..11a7d8b339ae9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1021,8 +1021,9 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, ExtractLane, /// Explicit user for the resume phi of the canonical induction in the main /// VPlan, used by the epilogue vector loop. - ResumeForEpilogue - + ResumeForEpilogue, + /// Returns the value for vscale. + VScale, }; private: @@ -1170,6 +1171,7 @@ class VPInstructionWithType : public VPInstruction { switch (VPI->getOpcode()) { case VPInstruction::WideIVStep: case VPInstruction::StepVector: + case VPInstruction::VScale: return true; default: return false; @@ -3971,9 +3973,6 @@ class VPlan { VPBB->setPlan(this); } - /// Prepare the plan for execution, setting up the required live-in values. - void prepareToExecute(VPTransformState &State); - /// Generate the IR code for this VPlan. void execute(VPTransformState *State); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index debc4c4b4f5c8..23c10d2b25263 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -452,6 +452,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { switch (Opcode) { case VPInstruction::StepVector: + case VPInstruction::VScale: return 0; case Instruction::Alloca: case Instruction::ExtractValue: @@ -1040,6 +1041,7 @@ bool VPInstruction::isSingleScalar() const { case Instruction::PHI: case VPInstruction::ExplicitVectorLength: case VPInstruction::ResumeForEpilogue: + case VPInstruction::VScale: return true; default: return isScalarCast(); @@ -1107,6 +1109,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::WidePtrAdd: case VPInstruction::StepVector: case VPInstruction::ReductionStartVector: + case VPInstruction::VScale: return false; default: return true; @@ -1299,6 +1302,12 @@ void VPInstructionWithType::execute(VPTransformState &State) { State.set(this, StepVector); break; } + case VPInstruction::VScale: { + Value *VScale = State.Builder.CreateVScale(ResultTy); + State.set(this, VScale, true); + break; + } + default: llvm_unreachable("opcode not implemented yet"); } @@ -1319,6 +1328,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent, case VPInstruction::StepVector: O << "step-vector " << *ResultTy; break; + case VPInstruction::VScale: + O << "vscale " << *ResultTy; + break; default: assert(Instruction::isCast(getOpcode()) && "unhandled opcode"); O << Instruction::getOpcodeName(getOpcode()) << " "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2407b06eab6f3..c019311b2faba 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3336,6 +3336,42 @@ void VPlanTransforms::materializeVectorTripCount(VPlan &Plan, VectorTC.replaceAllUsesWith(Res); } +void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, + ElementCount VFEC) { + VPBuilder Builder(VectorPH, VectorPH->begin()); + Type *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount()); + VPValue &VF = Plan.getVF(); + VPValue &VFxUF = Plan.getVFxUF(); + // Note that after the transform, Plan.getVF and Plan.getVFxUF should not be + // used. + // TODO: Assert that they aren't used. + + // If there are no users of the runtime VF, compute VFxUF by constant folding + // the multiplication of VF and UF. + if (VF.getNumUsers() == 0) { + VPValue *RuntimeVFxUF = + Builder.createElementCount(TCTy, VFEC * Plan.getUF()); + VFxUF.replaceAllUsesWith(RuntimeVFxUF); + return; + } + + // For users of the runtime VF, compute it as VF * vscale, and VFxUF as (VF * + // vscale) * UF. + VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC); + if (any_of(VF.users(), [&VF](VPUser *U) { return !U->usesScalars(&VF); })) { + VPValue *BC = Builder.createNaryOp(VPInstruction::Broadcast, RuntimeVF); + VF.replaceUsesWithIf( + BC, [&VF](VPUser &U, unsigned) { return !U.usesScalars(&VF); }); + } + VF.replaceAllUsesWith(RuntimeVF); + + VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF())); + VPValue *MulByUF = Plan.getUF() == 1 ? RuntimeVF + : Builder.createNaryOp(Instruction::Mul, + {RuntimeVF, UF}); + VFxUF.replaceAllUsesWith(MulByUF); +} + /// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be /// converted to a narrower recipe. \p V is used by a wide recipe that feeds a /// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index f507c39822080..5de1483783a42 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -276,6 +276,10 @@ struct VPlanTransforms { static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH); + /// Materialize VF and VFxUF to be computed explicitly using VPInstructions. + static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, + ElementCount VF); + /// Try to convert a plan with interleave groups with VF elements to a plan /// with the interleave groups replaced by wide loads and stores processing VF /// elements, if all transformed interleave groups access the full vector diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 351da8b6145be..dd9d3a1fae223 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -14,12 +14,12 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 +; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 -; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() ; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to @@ -76,12 +76,12 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 +; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 -; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() ; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 879504c3f8b81..664af3c35b5fd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -72,7 +72,6 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' { -; CHECK-NEXT: Live-in ir<[[EP_VFxUF:.+]]> = VF * UF ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index 83541f288140a..e822b9212f063 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -157,6 +157,8 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -167,8 +169,6 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult [[VEC_IND]], splat (i64 495616) ; CHECK-NEXT: [[TMP10:%.*]] = icmp ult [[STEP_ADD]], splat (i64 495616) diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 07402ab7917e2..6cfd955afaae0 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -674,6 +674,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4 +; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP6]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]] @@ -686,8 +688,6 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2: [[VECTOR_BODY]]: ; CHECK-VF4UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[STEP_ADD:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT:%.*]] = add [[STEP_ADD]], [[BROADCAST_SPLAT2]] ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = add [[VEC_IND_NEXT]], [[BROADCAST_SPLAT]] ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index ce4592c6491b7..359132f7904cc 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -17,6 +17,8 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP4]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -25,8 +27,6 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() @@ -95,6 +95,8 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -103,8 +105,6 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP2]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index 0467a583ab79b..e251c2a853b9a 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -14,6 +14,8 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]] @@ -26,8 +28,6 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 49d87a209da96..32aadcfb164e6 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -3,8 +3,6 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK: VPlan 'Final VPlan for VF={2},UF={1}' { -; CHECK-NEXT: Live-in ir<[[VF:.+]]> = VF -; CHECK-NEXT: Live-in ir<[[VFxUF:.+]]> = VF * UF ; CHECK-NEXT: ir<%0> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -76,7 +74,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): default.2 ; CHECK-EMPTY: ; CHECK-NEXT: default.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<2> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index d200359f45a95..c65d9368d595f 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -62,7 +62,6 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; ; CHECK: Executing best plan with VF=8, UF=2 ; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' { -; CHECK-NEXT: Live-in ir<16> = VF * UF ; CHECK-NEXT: Live-in ir<%and> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: