@@ -463,15 +463,15 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
463463 VPlanTransforms::removeDeadRecipes (Plan);
464464}
465465
466- // / Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
467- // / Def2LaneDefs to look up scalar definitions for operands of \RepR .
468- static VPReplicateRecipe *
466+ // / Create a single-scalar clone of \p DefR for lane \p Lane. Use \p
467+ // / Def2LaneDefs to look up scalar definitions for operands of \DefR .
468+ static VPRecipeWithIRFlags *
469469cloneForLane (VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
470- VPReplicateRecipe *RepR , VPLane Lane,
470+ VPRecipeWithIRFlags *DefR , VPLane Lane,
471471 const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
472472 // Collect the operands at Lane, creating extracts as needed.
473473 SmallVector<VPValue *> NewOps;
474- for (VPValue *Op : RepR ->operands ()) {
474+ for (VPValue *Op : DefR ->operands ()) {
475475 // If Op is a definition that has been unrolled, directly use the clone for
476476 // the corresponding lane.
477477 auto LaneDefs = Def2LaneDefs.find (Op);
@@ -501,11 +501,19 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
501501 NewOps.push_back (Ext);
502502 }
503503
504- auto *New =
505- new VPReplicateRecipe (RepR->getUnderlyingInstr (), NewOps,
506- /* IsSingleScalar=*/ true , /* Mask=*/ nullptr , *RepR);
507- New->transferFlags (*RepR);
508- New->insertBefore (RepR);
504+ VPRecipeWithIRFlags *New;
505+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
506+ New =
507+ new VPReplicateRecipe (RepR->getUnderlyingInstr (), NewOps,
508+ /* IsSingleScalar=*/ true , /* Mask=*/ nullptr , *RepR);
509+ } else {
510+ New = DefR->clone ();
511+ for (const auto &[Idx, Op] : enumerate(NewOps)) {
512+ New->setOperand (Idx, Op);
513+ }
514+ }
515+ New->transferFlags (*DefR);
516+ New->insertBefore (DefR);
509517 return New;
510518}
511519
@@ -530,41 +538,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
530538 SmallVector<VPRecipeBase *> ToRemove;
531539 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
532540 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
533- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
534- if (!RepR || RepR->isSingleScalar ())
541+ auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
542+ if (!DefR || !isa<VPInstruction, VPReplicateRecipe>(DefR))
543+ continue ;
544+ if ((isa<VPReplicateRecipe>(DefR) &&
545+ cast<VPReplicateRecipe>(DefR)->isSingleScalar ()) ||
546+ (isa<VPInstruction>(DefR) &&
547+ !cast<VPInstruction>(DefR)->doesGeneratePerAllLanes ()))
535548 continue ;
536549
537- VPBuilder Builder (RepR );
538- if (RepR ->getNumUsers () == 0 ) {
539- if (isa<StoreInst>(RepR ->getUnderlyingInstr ()) &&
540- vputils::isSingleScalar (RepR ->getOperand (1 ))) {
550+ VPBuilder Builder (DefR );
551+ if (DefR ->getNumUsers () == 0 ) {
552+ if (isa<StoreInst>(DefR ->getUnderlyingInstr ()) &&
553+ vputils::isSingleScalar (DefR ->getOperand (1 ))) {
541554 // Stores to invariant addresses need to store the last lane only.
542- cloneForLane (Plan, Builder, IdxTy, RepR , VPLane::getLastLaneForVF (VF),
555+ cloneForLane (Plan, Builder, IdxTy, DefR , VPLane::getLastLaneForVF (VF),
543556 Def2LaneDefs);
544557 } else {
545- // Create single-scalar version of RepR for all lanes.
558+ // Create single-scalar version of DefR for all lanes.
546559 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
547- cloneForLane (Plan, Builder, IdxTy, RepR , VPLane (I), Def2LaneDefs);
560+ cloneForLane (Plan, Builder, IdxTy, DefR , VPLane (I), Def2LaneDefs);
548561 }
549- RepR ->eraseFromParent ();
562+ DefR ->eraseFromParent ();
550563 continue ;
551564 }
552- // / Create single-scalar version of RepR for all lanes.
565+ // / Create single-scalar version of DefR for all lanes.
553566 SmallVector<VPValue *> LaneDefs;
554567 for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
555568 LaneDefs.push_back (
556- cloneForLane (Plan, Builder, IdxTy, RepR , VPLane (I), Def2LaneDefs));
569+ cloneForLane (Plan, Builder, IdxTy, DefR , VPLane (I), Def2LaneDefs));
557570
558- Def2LaneDefs[RepR ] = LaneDefs;
571+ Def2LaneDefs[DefR ] = LaneDefs;
559572 // / Users that only demand the first lane can use the definition for lane
560573 // / 0.
561- RepR ->replaceUsesWithIf (LaneDefs[0 ], [RepR ](VPUser &U, unsigned ) {
562- return U.onlyFirstLaneUsed (RepR );
574+ DefR ->replaceUsesWithIf (LaneDefs[0 ], [DefR ](VPUser &U, unsigned ) {
575+ return U.onlyFirstLaneUsed (DefR );
563576 });
564577
565- // Update each build vector user that currently has RepR as its only
578+ // Update each build vector user that currently has DefR as its only
566579 // operand, to have all LaneDefs as its operands.
567- for (VPUser *U : to_vector (RepR ->users ())) {
580+ for (VPUser *U : to_vector (DefR ->users ())) {
568581 auto *VPI = dyn_cast<VPInstruction>(U);
569582 if (!VPI || (VPI->getOpcode () != VPInstruction::BuildVector &&
570583 VPI->getOpcode () != VPInstruction::BuildStructVector))
@@ -576,7 +589,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
576589 for (VPValue *LaneDef : drop_begin (LaneDefs))
577590 VPI->addOperand (LaneDef);
578591 }
579- ToRemove.push_back (RepR );
592+ ToRemove.push_back (DefR );
580593 }
581594 }
582595 for (auto *R : reverse (ToRemove))
0 commit comments