Skip to content

Commit b7a1ae9

Browse files
committed
[VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) (llvm#151487)
Materialze Build(Struct)Vectors explicitly for VPRecplicateRecipes, to serve their users requiring a vector, instead of doing so when unrolling by VF. Now we only need to implicitly build vectors in VPTransformState::get for VPInstructions. Once they are also unrolled by VF we can remove the code-path alltogether. PR: llvm#151487 (cherry picked from commit 7e99893)
1 parent fa3ae38 commit b7a1ae9

File tree

6 files changed

+101
-18
lines changed

6 files changed

+101
-18
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7273,8 +7273,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72737273
// cost model is complete for better cost estimates.
72747274
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
72757275
OrigLoop->getHeader()->getContext());
7276-
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
7276+
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
72777277
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
7278+
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
72787279
bool HasBranchWeights =
72797280
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
72807281
if (HasBranchWeights) {

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
372372
set(Def, VectorValue);
373373
} else {
374374
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
375+
assert(isa<VPInstruction>(Def) &&
376+
"Explicit BuildVector recipes must have"
377+
"handled packing for non-VPInstructions.");
375378
// Initialize packing with insertelements to start from poison.
376379
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
377380
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
465465
case Instruction::Load:
466466
case VPInstruction::AnyOf:
467467
case VPInstruction::BranchOnCond:
468+
case VPInstruction::BuildStructVector:
469+
case VPInstruction::BuildVector:
468470
case VPInstruction::CalculateTripCountMinusVF:
469471
case VPInstruction::CanonicalIVIncrementForPart:
470472
case VPInstruction::ExplicitVectorLength:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3417,6 +3417,52 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
34173417
}
34183418
}
34193419

3420+
void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
3421+
if (Plan.hasScalarVFOnly())
3422+
return;
3423+
3424+
VPTypeAnalysis TypeInfo(Plan);
3425+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
3426+
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
3427+
vp_depth_first_shallow(Plan.getEntry()));
3428+
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
3429+
vp_depth_first_shallow(LoopRegion->getEntry()));
3430+
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
3431+
// excluding ones in replicate regions. Those are not materialized explicitly
3432+
// yet. Those vector users are still handled in VPReplicateRegion::execute(),
3433+
// via shouldPack().
3434+
// TODO: materialize build vectors for replicating recipes in replicating
3435+
// regions.
3436+
// TODO: materialize build vectors for VPInstructions.
3437+
for (VPBasicBlock *VPBB :
3438+
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
3439+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
3440+
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
3441+
auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
3442+
VPRegionBlock *ParentRegion =
3443+
cast<VPRecipeBase>(U)->getParent()->getParent();
3444+
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
3445+
};
3446+
if (!RepR || RepR->isSingleScalar() ||
3447+
none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
3448+
continue;
3449+
3450+
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
3451+
unsigned Opcode = ScalarTy->isStructTy()
3452+
? VPInstruction::BuildStructVector
3453+
: VPInstruction::BuildVector;
3454+
auto *BuildVector = new VPInstruction(Opcode, {RepR});
3455+
BuildVector->insertAfter(RepR);
3456+
3457+
RepR->replaceUsesWithIf(
3458+
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3459+
VPUser &U, unsigned) {
3460+
return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
3461+
});
3462+
}
3463+
}
3464+
}
3465+
34203466
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
34213467
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
34223468
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ struct VPlanTransforms {
248248
static void sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
249249
const Loop *L);
250250

251+
/// Add explicit Build[Struct]Vector recipes that combine multiple scalar
252+
/// values into single vectors.
253+
static void materializeBuildVectors(VPlan &Plan);
254+
251255
/// Try to convert a plan with interleave groups with VF elements to a plan
252256
/// with the interleave groups replaced by wide loads and stores processing VF
253257
/// elements, if all transformed interleave groups access the full vector

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -455,10 +455,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
455455
VPlanTransforms::removeDeadRecipes(Plan);
456456
}
457457

458-
/// Create a single-scalar clone of \p RepR for lane \p Lane.
459-
static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
460-
Type *IdxTy, VPReplicateRecipe *RepR,
461-
VPLane Lane) {
458+
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
459+
/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
460+
static VPReplicateRecipe *
461+
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
462+
VPReplicateRecipe *RepR, VPLane Lane,
463+
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
462464
// Collect the operands at Lane, creating extracts as needed.
463465
SmallVector<VPValue *> NewOps;
464466
for (VPValue *Op : RepR->operands()) {
@@ -471,6 +473,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
471473
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
472474
continue;
473475
}
476+
// If Op is a definition that has been unrolled, directly use the clone for
477+
// the corresponding lane.
478+
auto LaneDefs = Def2LaneDefs.find(Op);
479+
if (LaneDefs != Def2LaneDefs.end()) {
480+
NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
481+
continue;
482+
}
483+
474484
// Look through buildvector to avoid unnecessary extracts.
475485
if (match(Op, m_BuildVector())) {
476486
NewOps.push_back(
@@ -503,6 +513,13 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
503513
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
504514
auto VPBBsToUnroll =
505515
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
516+
// A mapping of current VPValue definitions to collections of new VPValues
517+
// defined per lane. Serves to hook-up potential users of current VPValue
518+
// definition that are replicated-per-VF later.
519+
DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
520+
// The removal of current recipes being replaced by new ones needs to be
521+
// delayed after Def2LaneDefs is no longer in use.
522+
SmallVector<VPRecipeBase *> ToRemove;
506523
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
507524
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
508525
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
@@ -514,36 +531,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
514531
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
515532
vputils::isSingleScalar(RepR->getOperand(1))) {
516533
// Stores to invariant addresses need to store the last lane only.
517-
cloneForLane(Plan, Builder, IdxTy, RepR,
518-
VPLane::getLastLaneForVF(VF));
534+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
535+
Def2LaneDefs);
519536
} else {
520537
// Create single-scalar version of RepR for all lanes.
521538
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
522-
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
539+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
523540
}
524541
RepR->eraseFromParent();
525542
continue;
526543
}
527544
/// Create single-scalar version of RepR for all lanes.
528545
SmallVector<VPValue *> LaneDefs;
529546
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
530-
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
547+
LaneDefs.push_back(
548+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
531549

550+
Def2LaneDefs[RepR] = LaneDefs;
532551
/// Users that only demand the first lane can use the definition for lane
533552
/// 0.
534553
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
535554
return U.onlyFirstLaneUsed(RepR);
536555
});
537556

538-
// If needed, create a Build(Struct)Vector recipe to insert the scalar
539-
// lane values into a vector.
540-
Type *ResTy = RepR->getUnderlyingInstr()->getType();
541-
VPValue *VecRes = Builder.createNaryOp(
542-
ResTy->isStructTy() ? VPInstruction::BuildStructVector
543-
: VPInstruction::BuildVector,
544-
LaneDefs);
545-
RepR->replaceAllUsesWith(VecRes);
546-
RepR->eraseFromParent();
557+
// Update each build vector user that currently has RepR as its only
558+
// operand, to have all LaneDefs as its operands.
559+
for (VPUser *U : to_vector(RepR->users())) {
560+
auto *VPI = dyn_cast<VPInstruction>(U);
561+
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
562+
VPI->getOpcode() != VPInstruction::BuildStructVector))
563+
continue;
564+
assert(VPI->getNumOperands() == 1 &&
565+
"Build(Struct)Vector must have a single operand before "
566+
"replicating by VF");
567+
VPI->setOperand(0, LaneDefs[0]);
568+
for (VPValue *LaneDef : drop_begin(LaneDefs))
569+
VPI->addOperand(LaneDef);
570+
}
571+
ToRemove.push_back(RepR);
547572
}
548573
}
574+
for (auto *R : reverse(ToRemove))
575+
R->eraseFromParent();
549576
}

0 commit comments

Comments
 (0)