@@ -465,11 +465,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
465465 VPlanTransforms::removeDeadRecipes (Plan);
466466}
467467
468- // / Create a single-scalar clone of \p RepR for lane \p Lane.
468+ // / Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
469+ // / Def2LaneDefs to look up scalar definitions for operands of \RepR.
469470static VPReplicateRecipe *
470471cloneForLane (VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
471472 VPReplicateRecipe *RepR, VPLane Lane,
472- DenseMap<VPValue *, SmallVector<VPValue *>> &Value2Lanes ) {
473+ const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs ) {
473474 // Collect the operands at Lane, creating extracts as needed.
474475 SmallVector<VPValue *> NewOps;
475476 for (VPValue *Op : RepR->operands ()) {
@@ -482,8 +483,11 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
482483 Builder.createNaryOp (VPInstruction::ExtractLastElement, {Op}));
483484 continue ;
484485 }
485- if (Value2Lanes.contains (Op)) {
486- NewOps.push_back (Value2Lanes[Op][Lane.getKnownLane ()]);
486+ // If Op is a definition that has been unrolled, directly use the clone for
487+ // the corresponding lane.
488+ auto LaneDefs = Def2LaneDefs.find (Op);
489+ if (LaneDefs != Def2LaneDefs.end ()) {
490+ NewOps.push_back (LaneDefs->second [Lane.getKnownLane ()]);
487491 continue ;
488492 }
489493
@@ -519,7 +523,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
519523 vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()));
520524 auto VPBBsToUnroll =
521525 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
522- DenseMap<VPValue *, SmallVector<VPValue *>> Value2Lanes ;
526+ DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs ;
523527 SmallVector<VPRecipeBase *> ToRemove;
524528 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
525529 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
@@ -533,11 +537,11 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
533537 vputils::isSingleScalar (RepR->getOperand (1 ))) {
534538 // Stores to invariant addresses need to store the last lane only.
535539 cloneForLane (Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF (VF),
536- Value2Lanes );
540+ Def2LaneDefs );
537541 } else {
538542 // Create single-scalar version of RepR for all lanes.
539543 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
540- cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Value2Lanes );
544+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs );
541545 }
542546 RepR->eraseFromParent ();
543547 continue ;
@@ -546,22 +550,24 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
546550 SmallVector<VPValue *> LaneDefs;
547551 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
548552 LaneDefs.push_back (
549- cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Value2Lanes ));
553+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs ));
550554
551- Value2Lanes [RepR] = LaneDefs;
555+ Def2LaneDefs [RepR] = LaneDefs;
552556 // / Users that only demand the first lane can use the definition for lane
553557 // / 0.
554558 RepR->replaceUsesWithIf (LaneDefs[0 ], [RepR](VPUser &U, unsigned ) {
555559 return U.onlyFirstLaneUsed (RepR);
556560 });
557561
562+ // Update each build vector user that currently has RepR as its only
563+ // operand, to have all LaneDefs as its operands.
558564 for (VPUser *U : to_vector (RepR->users ())) {
559565 auto *VPI = dyn_cast<VPInstruction>(U);
560566 if (!VPI || (VPI->getOpcode () != VPInstruction::BuildVector &&
561567 VPI->getOpcode () != VPInstruction::BuildStructVector))
562568 continue ;
563569 assert (VPI->getNumOperands () == 1 &&
564- " Build(Struct)Vector must have a single operand" );
570+ " Build(Struct)Vector must have a single operand before replicating by VF " " );
565571 VPI->setOperand(0, LaneDefs[0]);
566572 for (VPValue *Def : drop_begin(LaneDefs))
567573 VPI->addOperand(Def);
0 commit comments