@@ -455,10 +455,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
455455 VPlanTransforms::removeDeadRecipes (Plan);
456456}
457457
458- // / Create a single-scalar clone of \p RepR for lane \p Lane.
459- static VPReplicateRecipe *cloneForLane (VPlan &Plan, VPBuilder &Builder,
460- Type *IdxTy, VPReplicateRecipe *RepR,
461- VPLane Lane) {
458+ // / Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
459+ // / Def2LaneDefs to look up scalar definitions for operands of \RepR.
460+ static VPReplicateRecipe *
461+ cloneForLane (VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
462+ VPReplicateRecipe *RepR, VPLane Lane,
463+ const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
462464 // Collect the operands at Lane, creating extracts as needed.
463465 SmallVector<VPValue *> NewOps;
464466 for (VPValue *Op : RepR->operands ()) {
@@ -471,6 +473,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
471473 Builder.createNaryOp (VPInstruction::ExtractLastElement, {Op}));
472474 continue ;
473475 }
476+ // If Op is a definition that has been unrolled, directly use the clone for
477+ // the corresponding lane.
478+ auto LaneDefs = Def2LaneDefs.find (Op);
479+ if (LaneDefs != Def2LaneDefs.end ()) {
480+ NewOps.push_back (LaneDefs->second [Lane.getKnownLane ()]);
481+ continue ;
482+ }
483+
474484 // Look through buildvector to avoid unnecessary extracts.
475485 if (match (Op, m_BuildVector ())) {
476486 NewOps.push_back (
@@ -503,6 +513,13 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
503513 vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()));
504514 auto VPBBsToUnroll =
505515 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
516+ // A mapping of current VPValue definitions to collections of new VPValues
517+ // defined per lane. Serves to hook-up potential users of current VPValue
518+ // definition that are replicated-per-VF later.
519+ DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
520+ // The removal of current recipes being replaced by new ones needs to be
521+ // delayed after Def2LaneDefs is no longer in use.
522+ SmallVector<VPRecipeBase *> ToRemove;
506523 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
507524 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
508525 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
@@ -514,36 +531,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
514531 if (isa<StoreInst>(RepR->getUnderlyingInstr ()) &&
515532 vputils::isSingleScalar (RepR->getOperand (1 ))) {
516533 // Stores to invariant addresses need to store the last lane only.
517- cloneForLane (Plan, Builder, IdxTy, RepR,
518- VPLane::getLastLaneForVF (VF) );
534+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF (VF),
535+ Def2LaneDefs );
519536 } else {
520537 // Create single-scalar version of RepR for all lanes.
521538 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
522- cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I));
539+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs );
523540 }
524541 RepR->eraseFromParent ();
525542 continue ;
526543 }
527544 // / Create single-scalar version of RepR for all lanes.
528545 SmallVector<VPValue *> LaneDefs;
529546 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
530- LaneDefs.push_back (cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I)));
547+ LaneDefs.push_back (
548+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs));
531549
550+ Def2LaneDefs[RepR] = LaneDefs;
532551 // / Users that only demand the first lane can use the definition for lane
533552 // / 0.
534553 RepR->replaceUsesWithIf (LaneDefs[0 ], [RepR](VPUser &U, unsigned ) {
535554 return U.onlyFirstLaneUsed (RepR);
536555 });
537556
538- // If needed, create a Build(Struct)Vector recipe to insert the scalar
539- // lane values into a vector.
540- Type *ResTy = RepR->getUnderlyingInstr ()->getType ();
541- VPValue *VecRes = Builder.createNaryOp (
542- ResTy->isStructTy () ? VPInstruction::BuildStructVector
543- : VPInstruction::BuildVector,
544- LaneDefs);
545- RepR->replaceAllUsesWith (VecRes);
546- RepR->eraseFromParent ();
557+ // Update each build vector user that currently has RepR as its only
558+ // operand, to have all LaneDefs as its operands.
559+ for (VPUser *U : to_vector (RepR->users ())) {
560+ auto *VPI = dyn_cast<VPInstruction>(U);
561+ if (!VPI || (VPI->getOpcode () != VPInstruction::BuildVector &&
562+ VPI->getOpcode () != VPInstruction::BuildStructVector))
563+ continue ;
564+ assert (VPI->getNumOperands () == 1 &&
565+ " Build(Struct)Vector must have a single operand before "
566+ " replicating by VF" );
567+ VPI->setOperand (0 , LaneDefs[0 ]);
568+ for (VPValue *LaneDef : drop_begin (LaneDefs))
569+ VPI->addOperand (LaneDef);
570+ }
571+ ToRemove.push_back (RepR);
547572 }
548573 }
574+ for (auto *R : reverse (ToRemove))
575+ R->eraseFromParent ();
549576}
0 commit comments