@@ -464,10 +464,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
464464 VPlanTransforms::removeDeadRecipes (Plan);
465465}
466466
467- // / Create a single-scalar clone of \p RepR for lane \p Lane.
468- static VPReplicateRecipe *cloneForLane (VPlan &Plan, VPBuilder &Builder,
469- Type *IdxTy, VPReplicateRecipe *RepR,
470- VPLane Lane) {
467+ // / Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
468+ // / Def2LaneDefs to look up scalar definitions for operands of \RepR.
469+ static VPReplicateRecipe *
470+ cloneForLane (VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
471+ VPReplicateRecipe *RepR, VPLane Lane,
472+ const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
471473 // Collect the operands at Lane, creating extracts as needed.
472474 SmallVector<VPValue *> NewOps;
473475 for (VPValue *Op : RepR->operands ()) {
@@ -480,6 +482,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
480482 Builder.createNaryOp (VPInstruction::ExtractLastElement, {Op}));
481483 continue ;
482484 }
485+ // If Op is a definition that has been unrolled, directly use the clone for
486+ // the corresponding lane.
487+ auto LaneDefs = Def2LaneDefs.find (Op);
488+ if (LaneDefs != Def2LaneDefs.end ()) {
489+ NewOps.push_back (LaneDefs->second [Lane.getKnownLane ()]);
490+ continue ;
491+ }
492+
483493 // Look through buildvector to avoid unnecessary extracts.
484494 if (match (Op, m_BuildVector ())) {
485495 NewOps.push_back (
@@ -512,6 +522,13 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
512522 vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()));
513523 auto VPBBsToUnroll =
514524 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
525+ // A mapping of current VPValue definitions to collections of new VPValues
526+ // defined per lane. Serves to hook-up potential users of current VPValue
527+ // definition that are replicated-per-VF later.
528+ DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
529+ // The removal of current recipes being replaced by new ones needs to be
530+ // delayed after Def2LaneDefs is no longer in use.
531+ SmallVector<VPRecipeBase *> ToRemove;
515532 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
516533 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
517534 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
@@ -523,36 +540,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
523540 if (isa<StoreInst>(RepR->getUnderlyingInstr ()) &&
524541 vputils::isSingleScalar (RepR->getOperand (1 ))) {
525542 // Stores to invariant addresses need to store the last lane only.
526- cloneForLane (Plan, Builder, IdxTy, RepR,
527- VPLane::getLastLaneForVF (VF) );
543+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF (VF),
544+ Def2LaneDefs );
528545 } else {
529546 // Create single-scalar version of RepR for all lanes.
530547 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
531- cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I));
548+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs );
532549 }
533550 RepR->eraseFromParent ();
534551 continue ;
535552 }
536553 // / Create single-scalar version of RepR for all lanes.
537554 SmallVector<VPValue *> LaneDefs;
538555 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
539- LaneDefs.push_back (cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I)));
556+ LaneDefs.push_back (
557+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs));
540558
559+ Def2LaneDefs[RepR] = LaneDefs;
541560 // / Users that only demand the first lane can use the definition for lane
542561 // / 0.
543562 RepR->replaceUsesWithIf (LaneDefs[0 ], [RepR](VPUser &U, unsigned ) {
544563 return U.onlyFirstLaneUsed (RepR);
545564 });
546565
547- // If needed, create a Build(Struct)Vector recipe to insert the scalar
548- // lane values into a vector.
549- Type *ResTy = RepR->getUnderlyingInstr ()->getType ();
550- VPValue *VecRes = Builder.createNaryOp (
551- ResTy->isStructTy () ? VPInstruction::BuildStructVector
552- : VPInstruction::BuildVector,
553- LaneDefs);
554- RepR->replaceAllUsesWith (VecRes);
555- RepR->eraseFromParent ();
566+ // Update each build vector user that currently has RepR as its only
567+ // operand, to have all LaneDefs as its operands.
568+ for (VPUser *U : to_vector (RepR->users ())) {
569+ auto *VPI = dyn_cast<VPInstruction>(U);
570+ if (!VPI || (VPI->getOpcode () != VPInstruction::BuildVector &&
571+ VPI->getOpcode () != VPInstruction::BuildStructVector))
572+ continue ;
573+ assert (VPI->getNumOperands () == 1 &&
574+ " Build(Struct)Vector must have a single operand before "
575+ " replicating by VF" );
576+ VPI->setOperand (0 , LaneDefs[0 ]);
577+ for (VPValue *LaneDef : drop_begin (LaneDefs))
578+ VPI->addOperand (LaneDef);
579+ }
580+ ToRemove.push_back (RepR);
556581 }
557582 }
583+ for (auto *R : reverse (ToRemove))
584+ R->eraseFromParent ();
558585}
0 commit comments