Skip to content

Commit 09d5155

Browse files
fhahnkrishna2803
authored andcommitted
[VPlan] Materialize BackedgeTakenCount using VPInstructions.
Explicitly compute the backedge-taken count using VPInstruction. This is needed to model the full skeleton in VPlan. NFC modulo some instruction re-ordering.
1 parent b9800b0 commit 09d5155

File tree

6 files changed

+27
-15
lines changed

6 files changed

+27
-15
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7302,6 +7302,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73027302

73037303
// Retrieving VectorPH now when it's easier while VPlan still has Regions.
73047304
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
7305+
73057306
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
73067307
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
73077308
VPlanTransforms::removeBranchOnConst(BestVPlan);
@@ -7317,6 +7318,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73177318
VPlanTransforms::dissolveLoopRegions(BestVPlan);
73187319
// Canonicalize EVL loops after regions are dissolved.
73197320
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
7321+
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
7322+
73207323
// Perform the actual loop transformation.
73217324
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
73227325
OrigLoop->getParentLoop(),
@@ -7373,7 +7376,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73737376

73747377
// 2. Copy and widen instructions from the old loop into the new loop.
73757378
BestVPlan.prepareToExecute(
7376-
ILV.getTripCount(),
73777379
ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State);
73787380
replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);
73797381

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -951,24 +951,15 @@ VPlan::~VPlan() {
951951
delete BackedgeTakenCount;
952952
}
953953

954-
void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
955-
VPTransformState &State) {
956-
Type *TCTy = TripCountV->getType();
957-
// Check if the backedge taken count is needed, and if so build it.
958-
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
959-
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
960-
auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1),
961-
"trip.count.minus.1");
962-
BackedgeTakenCount->setUnderlyingValue(TCMO);
963-
}
964-
954+
void VPlan::prepareToExecute(Value *VectorTripCountV, VPTransformState &State) {
965955
if (!VectorTripCount.getUnderlyingValue())
966956
VectorTripCount.setUnderlyingValue(VectorTripCountV);
967957
else
968958
assert(VectorTripCount.getUnderlyingValue() == VectorTripCountV &&
969959
"VectorTripCount set earlier must much VectorTripCountV");
970960

971961
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
962+
Type *TCTy = VectorTripCountV->getType();
972963
// FIXME: Model VF * UF computation completely in VPlan.
973964
unsigned UF = getUF();
974965
if (VF.getNumUsers()) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3958,8 +3958,7 @@ class VPlan {
39583958
}
39593959

39603960
/// Prepare the plan for execution, setting up the required live-in values.
3961-
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3962-
VPTransformState &State);
3961+
void prepareToExecute(Value *VectorTripCount, VPTransformState &State);
39633962

39643963
/// Generate the IR code for this VPlan.
39653964
void execute(VPTransformState *State);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3176,6 +3176,21 @@ void VPlanTransforms::materializeVectorTripCount(
31763176
Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue());
31773177
}
31783178

3179+
void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
3180+
VPBasicBlock *VectorPH) {
3181+
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
3182+
if (BTC->getNumUsers() == 0)
3183+
return;
3184+
3185+
VPBuilder Builder(VectorPH, VectorPH->begin());
3186+
auto *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
3187+
auto *TCMO = Builder.createNaryOp(
3188+
Instruction::Sub,
3189+
{Plan.getTripCount(), Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 1))},
3190+
DebugLoc::getCompilerGenerated(), "trip.count.minus.1");
3191+
BTC->replaceAllUsesWith(TCMO);
3192+
}
3193+
31793194
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
31803195
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
31813196
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,11 @@ struct VPlanTransforms {
256256
unsigned BestUF,
257257
PredicatedScalarEvolution &PSE);
258258

259+
/// Materialize the backedge-taken count to be computed explicitly using
260+
/// VPInstructions.
261+
static void materializeBackedgeTakenCount(VPlan &Plan,
262+
VPBasicBlock *VectorPH);
263+
259264
/// Try to convert a plan with interleave groups with VF elements to a plan
260265
/// with the interleave groups replaced by wide loads and stores processing VF
261266
/// elements, if all transformed interleave groups access the full vector

llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
100100
; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
101101
; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
102102
; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
103-
; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
104103
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
105104
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
105+
; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
106106
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
107107
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
108108
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0

0 commit comments

Comments
 (0)