Skip to content

Commit 8ad1800

Browse files
committed
!fixup address comments, thnaks
1 parent 0d7aeda commit 8ad1800

File tree

5 files changed

+46
-40
lines changed

5 files changed

+46
-40
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7267,8 +7267,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72677267
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
72687268
OrigLoop->getHeader()->getContext());
72697269
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
7270-
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
72717270
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
7271+
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
72727272
bool HasBranchWeights =
72737273
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
72747274
if (HasBranchWeights) {

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
372372
set(Def, VectorValue);
373373
} else {
374374
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
375-
assert(isa<VPInstruction>(Def) && "Explicit BuildVector recipes must "
376-
"handle packing for non-VPInstructions.");
375+
assert(isa<VPInstruction>(Def) &&
376+
"Explicit BuildVector recipes must have"
377+
"handled packing for non-VPInstructions.");
377378
// Initialize packing with insertelements to start from poison.
378379
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
379380
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3289,38 +3289,37 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
32893289
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
32903290
vp_depth_first_shallow(LoopRegion->getEntry()));
32913291
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
3292-
// excluding ones in replicate regions. Those are not unrolled explicitly yet.
3292+
// excluding ones in replicate regions. Those are not materialized explicitly
3293+
// yet. Those vector users are still handled in VPReplicateRegion::execute(),
3294+
// via shouldPack().
3295+
// TODO: materialize build vectors for replicating recipes in replicating
3296+
// regions.
3297+
// TODO: materialize build vectors for VPInstructions.
32933298
for (VPBasicBlock *VPBB :
32943299
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
32953300
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
32963301
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
3297-
if (!RepR || RepR->isSingleScalar())
3298-
continue;
3299-
VPInstruction *BuildVector = nullptr;
3300-
for (VPUser *U : to_vector(RepR->users())) {
3302+
auto UsesVectorOrInsideReplicateRegions = [RepR, LoopRegion](VPUser *U) {
33013303
VPRegionBlock *ParentRegion =
33023304
cast<VPRecipeBase>(U)->getParent()->getParent();
3303-
if (U->usesScalars(RepR) && ParentRegion == LoopRegion)
3304-
continue;
3305-
3306-
if (!BuildVector) {
3307-
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
3308-
unsigned Opc = ScalarTy->isStructTy()
3309-
? VPInstruction::BuildStructVector
3310-
: VPInstruction::BuildVector;
3311-
BuildVector = new VPInstruction(Opc, {RepR});
3312-
BuildVector->insertAfter(RepR);
3313-
}
3305+
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
3306+
};
3307+
if (!RepR || RepR->isSingleScalar() ||
3308+
none_of(RepR->users(), UsesVectorOrInsideReplicateRegions))
3309+
continue;
33143310

3315-
// Only update a single operand per users, as the same user is added
3316-
// multiple times, once per use.
3317-
// TODO: Introduce de-duplicating iterator over users.
3318-
for (unsigned Idx = 0; Idx != U->getNumOperands(); ++Idx)
3319-
if (U->getOperand(Idx) == RepR) {
3320-
U->setOperand(Idx, BuildVector);
3321-
break;
3322-
}
3323-
}
3311+
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
3312+
unsigned Opcode = ScalarTy->isStructTy()
3313+
? VPInstruction::BuildStructVector
3314+
: VPInstruction::BuildVector;
3315+
auto *BuildVector = new VPInstruction(Opcode, {RepR});
3316+
BuildVector->insertAfter(RepR);
3317+
3318+
RepR->replaceUsesWithIf(
3319+
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegions](
3320+
VPUser &U, unsigned) {
3321+
return &U != BuildVector && UsesVectorOrInsideReplicateRegions(&U);
3322+
});
33243323
}
33253324
}
33263325
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,8 @@ struct VPlanTransforms {
276276
static void materializeBackedgeTakenCount(VPlan &Plan,
277277
VPBasicBlock *VectorPH);
278278

279-
/// Add explicit Build[Struct]Vector recipes that combine scalar values
280-
/// produced by VPReplicateRecipes to a single vector.
279+
/// Add explicit Build[Struct]Vector recipes that combine multiple scalar
280+
/// values into single vectors.
281281
static void materializeBuildVectors(VPlan &Plan);
282282

283283
/// Try to convert a plan with interleave groups with VF elements to a plan

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -465,11 +465,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
465465
VPlanTransforms::removeDeadRecipes(Plan);
466466
}
467467

468-
/// Create a single-scalar clone of \p RepR for lane \p Lane.
468+
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
469+
/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
469470
static VPReplicateRecipe *
470471
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
471472
VPReplicateRecipe *RepR, VPLane Lane,
472-
DenseMap<VPValue *, SmallVector<VPValue *>> &Value2Lanes) {
473+
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
473474
// Collect the operands at Lane, creating extracts as needed.
474475
SmallVector<VPValue *> NewOps;
475476
for (VPValue *Op : RepR->operands()) {
@@ -482,8 +483,11 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
482483
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
483484
continue;
484485
}
485-
if (Value2Lanes.contains(Op)) {
486-
NewOps.push_back(Value2Lanes[Op][Lane.getKnownLane()]);
486+
// If Op is a definition that has been unrolled, directly use the clone for
487+
// the corresponding lane.
488+
auto LaneDefs = Def2LaneDefs.find(Op);
489+
if (LaneDefs != Def2LaneDefs.end()) {
490+
NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
487491
continue;
488492
}
489493

@@ -519,7 +523,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
519523
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
520524
auto VPBBsToUnroll =
521525
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
522-
DenseMap<VPValue *, SmallVector<VPValue *>> Value2Lanes;
526+
DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
523527
SmallVector<VPRecipeBase *> ToRemove;
524528
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
525529
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -533,11 +537,11 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
533537
vputils::isSingleScalar(RepR->getOperand(1))) {
534538
// Stores to invariant addresses need to store the last lane only.
535539
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
536-
Value2Lanes);
540+
Def2LaneDefs);
537541
} else {
538542
// Create single-scalar version of RepR for all lanes.
539543
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
540-
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes);
544+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
541545
}
542546
RepR->eraseFromParent();
543547
continue;
@@ -546,22 +550,24 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
546550
SmallVector<VPValue *> LaneDefs;
547551
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
548552
LaneDefs.push_back(
549-
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes));
553+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
550554

551-
Value2Lanes[RepR] = LaneDefs;
555+
Def2LaneDefs[RepR] = LaneDefs;
552556
/// Users that only demand the first lane can use the definition for lane
553557
/// 0.
554558
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
555559
return U.onlyFirstLaneUsed(RepR);
556560
});
557561

562+
// Update each build vector user that currently has RepR as its only
563+
// operand, to have all LaneDefs as its operands.
558564
for (VPUser *U : to_vector(RepR->users())) {
559565
auto *VPI = dyn_cast<VPInstruction>(U);
560566
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
561567
VPI->getOpcode() != VPInstruction::BuildStructVector))
562568
continue;
563569
assert(VPI->getNumOperands() == 1 &&
564-
"Build(Struct)Vector must have a single operand");
570+
"Build(Struct)Vector must have a single operand before replicating by VF"");
565571
VPI->setOperand(0, LaneDefs[0]);
566572
for (VPValue *Def : drop_begin(LaneDefs))
567573
VPI->addOperand(Def);

0 commit comments

Comments
 (0)