Skip to content

Commit 4242589

Browse files
authored
[VPlan] Materialize VF and VFxUF using VPInstructions. (#152879)
Materialize VF and VFxUF computation using VPInstruction instead of directly creating IR. This is one of the last few steps needed to model the full vector skeleton in VPlan. This is mostly NFC, although in some cases we remove some unused computations. PR: #152879
1 parent 9d96d01 commit 4242589

15 files changed

+86
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,20 @@ class VPBuilder {
276276
return tryInsertInstruction(new VPPhi(IncomingValues, DL, Name));
277277
}
278278

279+
VPValue *createElementCount(Type *Ty, ElementCount EC) {
280+
VPlan &Plan = *getInsertBlock()->getPlan();
281+
VPValue *RuntimeEC =
282+
Plan.getOrAddLiveIn(ConstantInt::get(Ty, EC.getKnownMinValue()));
283+
if (EC.isScalable()) {
284+
VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
285+
RuntimeEC = EC.getKnownMinValue() == 1
286+
? VScale
287+
: createOverflowingOp(Instruction::Mul,
288+
{VScale, RuntimeEC}, {true, false});
289+
}
290+
return RuntimeEC;
291+
}
292+
279293
/// Convert the input value \p Current to the corresponding value of an
280294
/// induction with \p Start and \p Step values, using \p Start + \p Current *
281295
/// \p Step.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7308,6 +7308,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73087308
VPlanTransforms::materializeVectorTripCount(
73097309
BestVPlan, VectorPH, CM.foldTailByMasking(),
73107310
CM.requiresScalarEpilogue(BestVF.isVector()));
7311+
VPlanTransforms::materializeVFAndVFxUF(BestVPlan, VectorPH, BestVF);
73117312

73127313
// Perform the actual loop transformation.
73137314
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
@@ -7364,7 +7365,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73647365
//===------------------------------------------------===//
73657366

73667367
// 2. Copy and widen instructions from the old loop into the new loop.
7367-
BestVPlan.prepareToExecute(State);
73687368
replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);
73697369

73707370
// Move check blocks to their final position.

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -950,22 +950,6 @@ VPlan::~VPlan() {
950950
delete BackedgeTakenCount;
951951
}
952952

953-
void VPlan::prepareToExecute(VPTransformState &State) {
954-
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
955-
Type *TCTy = VPTypeAnalysis(*this).inferScalarType(getTripCount());
956-
// FIXME: Model VF * UF computation completely in VPlan.
957-
unsigned UF = getUF();
958-
if (VF.getNumUsers()) {
959-
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
960-
VF.setUnderlyingValue(RuntimeVF);
961-
VFxUF.setUnderlyingValue(
962-
UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
963-
: RuntimeVF);
964-
} else {
965-
VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
966-
}
967-
}
968-
969953
VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const {
970954
auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
971955
return VPIRBB->getIRBasicBlock() == IRBB;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,8 +1021,9 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
10211021
ExtractLane,
10221022
/// Explicit user for the resume phi of the canonical induction in the main
10231023
/// VPlan, used by the epilogue vector loop.
1024-
ResumeForEpilogue
1025-
1024+
ResumeForEpilogue,
1025+
/// Returns the value for vscale.
1026+
VScale,
10261027
};
10271028

10281029
private:
@@ -1170,6 +1171,7 @@ class VPInstructionWithType : public VPInstruction {
11701171
switch (VPI->getOpcode()) {
11711172
case VPInstruction::WideIVStep:
11721173
case VPInstruction::StepVector:
1174+
case VPInstruction::VScale:
11731175
return true;
11741176
default:
11751177
return false;
@@ -3971,9 +3973,6 @@ class VPlan {
39713973
VPBB->setPlan(this);
39723974
}
39733975

3974-
/// Prepare the plan for execution, setting up the required live-in values.
3975-
void prepareToExecute(VPTransformState &State);
3976-
39773976
/// Generate the IR code for this VPlan.
39783977
void execute(VPTransformState *State);
39793978

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
452452

453453
switch (Opcode) {
454454
case VPInstruction::StepVector:
455+
case VPInstruction::VScale:
455456
return 0;
456457
case Instruction::Alloca:
457458
case Instruction::ExtractValue:
@@ -1040,6 +1041,7 @@ bool VPInstruction::isSingleScalar() const {
10401041
case Instruction::PHI:
10411042
case VPInstruction::ExplicitVectorLength:
10421043
case VPInstruction::ResumeForEpilogue:
1044+
case VPInstruction::VScale:
10431045
return true;
10441046
default:
10451047
return isScalarCast();
@@ -1107,6 +1109,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
11071109
case VPInstruction::WidePtrAdd:
11081110
case VPInstruction::StepVector:
11091111
case VPInstruction::ReductionStartVector:
1112+
case VPInstruction::VScale:
11101113
return false;
11111114
default:
11121115
return true;
@@ -1299,6 +1302,12 @@ void VPInstructionWithType::execute(VPTransformState &State) {
12991302
State.set(this, StepVector);
13001303
break;
13011304
}
1305+
case VPInstruction::VScale: {
1306+
Value *VScale = State.Builder.CreateVScale(ResultTy);
1307+
State.set(this, VScale, true);
1308+
break;
1309+
}
1310+
13021311
default:
13031312
llvm_unreachable("opcode not implemented yet");
13041313
}
@@ -1319,6 +1328,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
13191328
case VPInstruction::StepVector:
13201329
O << "step-vector " << *ResultTy;
13211330
break;
1331+
case VPInstruction::VScale:
1332+
O << "vscale " << *ResultTy;
1333+
break;
13221334
default:
13231335
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
13241336
O << Instruction::getOpcodeName(getOpcode()) << " ";

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3336,6 +3336,42 @@ void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
33363336
VectorTC.replaceAllUsesWith(Res);
33373337
}
33383338

3339+
void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
3340+
ElementCount VFEC) {
3341+
VPBuilder Builder(VectorPH, VectorPH->begin());
3342+
Type *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
3343+
VPValue &VF = Plan.getVF();
3344+
VPValue &VFxUF = Plan.getVFxUF();
3345+
// Note that after the transform, Plan.getVF and Plan.getVFxUF should not be
3346+
// used.
3347+
// TODO: Assert that they aren't used.
3348+
3349+
// If there are no users of the runtime VF, compute VFxUF by constant folding
3350+
// the multiplication of VF and UF.
3351+
if (VF.getNumUsers() == 0) {
3352+
VPValue *RuntimeVFxUF =
3353+
Builder.createElementCount(TCTy, VFEC * Plan.getUF());
3354+
VFxUF.replaceAllUsesWith(RuntimeVFxUF);
3355+
return;
3356+
}
3357+
3358+
// For users of the runtime VF, compute it as VF * vscale, and VFxUF as (VF *
3359+
// vscale) * UF.
3360+
VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
3361+
if (any_of(VF.users(), [&VF](VPUser *U) { return !U->usesScalars(&VF); })) {
3362+
VPValue *BC = Builder.createNaryOp(VPInstruction::Broadcast, RuntimeVF);
3363+
VF.replaceUsesWithIf(
3364+
BC, [&VF](VPUser &U, unsigned) { return !U.usesScalars(&VF); });
3365+
}
3366+
VF.replaceAllUsesWith(RuntimeVF);
3367+
3368+
VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
3369+
VPValue *MulByUF = Plan.getUF() == 1 ? RuntimeVF
3370+
: Builder.createNaryOp(Instruction::Mul,
3371+
{RuntimeVF, UF});
3372+
VFxUF.replaceAllUsesWith(MulByUF);
3373+
}
3374+
33393375
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
33403376
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
33413377
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,10 @@ struct VPlanTransforms {
276276
static void materializeBackedgeTakenCount(VPlan &Plan,
277277
VPBasicBlock *VectorPH);
278278

279+
/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
280+
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
281+
ElementCount VF);
282+
279283
/// Try to convert a plan with interleave groups with VF elements to a plan
280284
/// with the interleave groups replaced by wide loads and stores processing VF
281285
/// elements, if all transformed interleave groups access the full vector

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ define void @induction_i7(ptr %dst) #0 {
1414
; CHECK: vector.ph:
1515
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1616
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
17+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
18+
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1719
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
1820
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
1921
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
2022
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
21-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
22-
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2323
; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc <vscale x 2 x i64> [[DOTSPLAT_]] to <vscale x 2 x i7>
2424
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
2525
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i7>
@@ -76,12 +76,12 @@ define void @induction_i3_zext(ptr %dst) #0 {
7676
; CHECK: vector.ph:
7777
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
7878
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
79+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
80+
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
7981
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
8082
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
8183
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
8284
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
83-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
84-
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
8585
; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc <vscale x 2 x i64> [[DOTSPLAT_]] to <vscale x 2 x i3>
8686
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
8787
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i3>

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
7272
; CHECK-NEXT: No successors
7373
; CHECK-NEXT: }
7474
; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' {
75-
; CHECK-NEXT: Live-in ir<[[EP_VFxUF:.+]]> = VF * UF
7675
; CHECK-NEXT: Live-in ir<1024> = original trip-count
7776
; CHECK-EMPTY:
7877
; CHECK-NEXT: ir-bb<entry>:

llvm/test/Transforms/LoopVectorize/scalable-assume.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no
157157
; CHECK: [[VECTOR_PH]]:
158158
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
159159
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
160+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
161+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
160162
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
161163
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
162164
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
@@ -167,8 +169,6 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no
167169
; CHECK: [[VECTOR_BODY]]:
168170
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
169171
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
170-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
171-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
172172
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
173173
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <vscale x 2 x i64> [[VEC_IND]], splat (i64 495616)
174174
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <vscale x 2 x i64> [[STEP_ADD]], splat (i64 495616)

0 commit comments

Comments
 (0)