@@ -463,15 +463,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
463
463
VPlanTransforms::removeDeadRecipes (Plan);
464
464
}
465
465
466
- // / Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
467
- // / Def2LaneDefs to look up scalar definitions for operands of \RepR.
468
- static VPReplicateRecipe *
466
+ // / Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
467
+ // / VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
468
+ // / definitions for operands of \DefR.
469
+ static VPRecipeWithIRFlags *
469
470
cloneForLane (VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
470
- VPReplicateRecipe *RepR , VPLane Lane,
471
+ VPRecipeWithIRFlags *DefR , VPLane Lane,
471
472
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
472
473
// Collect the operands at Lane, creating extracts as needed.
473
474
SmallVector<VPValue *> NewOps;
474
- for (VPValue *Op : RepR ->operands ()) {
475
+ for (VPValue *Op : DefR ->operands ()) {
475
476
// If Op is a definition that has been unrolled, directly use the clone for
476
477
// the corresponding lane.
477
478
auto LaneDefs = Def2LaneDefs.find (Op);
@@ -501,11 +502,24 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
501
502
NewOps.push_back (Ext);
502
503
}
503
504
504
- auto *New =
505
- new VPReplicateRecipe (RepR->getUnderlyingInstr (), NewOps,
506
- /* IsSingleScalar=*/ true , /* Mask=*/ nullptr , *RepR);
507
- New->transferFlags (*RepR);
508
- New->insertBefore (RepR);
505
+ VPRecipeWithIRFlags *New;
506
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
507
+ // TODO: have cloning of replicate recipes also provide the desired result
508
+ // coupled with setting its operands to NewOps (deriving IsSingleScalar and
509
+ // Mask from the operands?)
510
+ New =
511
+ new VPReplicateRecipe (RepR->getUnderlyingInstr (), NewOps,
512
+ /* IsSingleScalar=*/ true , /* Mask=*/ nullptr , *RepR);
513
+ } else {
514
+ assert (isa<VPInstruction>(DefR) &&
515
+ " DefR must be a VPReplicateRecipe or VPInstruction" );
516
+ New = DefR->clone ();
517
+ for (const auto &[Idx, Op] : enumerate(NewOps)) {
518
+ New->setOperand (Idx, Op);
519
+ }
520
+ }
521
+ New->transferFlags (*DefR);
522
+ New->insertBefore (DefR);
509
523
return New;
510
524
}
511
525
@@ -530,34 +544,38 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
530
544
SmallVector<VPRecipeBase *> ToRemove;
531
545
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
532
546
for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
533
- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
534
- if (!RepR || RepR->isSingleScalar ())
547
+ if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
548
+ (isa<VPReplicateRecipe>(&R) &&
549
+ cast<VPReplicateRecipe>(&R)->isSingleScalar ()) ||
550
+ (isa<VPInstruction>(&R) &&
551
+ !cast<VPInstruction>(&R)->doesGeneratePerAllLanes ()))
535
552
continue ;
536
553
537
- VPBuilder Builder (RepR);
538
- if (RepR->getNumUsers () == 0 ) {
539
- // Create single-scalar version of RepR for all lanes.
554
+ auto *DefR = cast<VPRecipeWithIRFlags>(&R);
555
+ VPBuilder Builder (DefR);
556
+ if (DefR->getNumUsers () == 0 ) {
557
+ // Create single-scalar version of DefR for all lanes.
540
558
for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
541
- cloneForLane (Plan, Builder, IdxTy, RepR , VPLane (I), Def2LaneDefs);
542
- RepR ->eraseFromParent ();
559
+ cloneForLane (Plan, Builder, IdxTy, DefR , VPLane (I), Def2LaneDefs);
560
+ DefR ->eraseFromParent ();
543
561
continue ;
544
562
}
545
- // / Create single-scalar version of RepR for all lanes.
563
+ // / Create single-scalar version of DefR for all lanes.
546
564
SmallVector<VPValue *> LaneDefs;
547
565
for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
548
566
LaneDefs.push_back (
549
- cloneForLane (Plan, Builder, IdxTy, RepR , VPLane (I), Def2LaneDefs));
567
+ cloneForLane (Plan, Builder, IdxTy, DefR , VPLane (I), Def2LaneDefs));
550
568
551
- Def2LaneDefs[RepR ] = LaneDefs;
569
+ Def2LaneDefs[DefR ] = LaneDefs;
552
570
// / Users that only demand the first lane can use the definition for lane
553
571
// / 0.
554
- RepR ->replaceUsesWithIf (LaneDefs[0 ], [RepR ](VPUser &U, unsigned ) {
555
- return U.onlyFirstLaneUsed (RepR );
572
+ DefR ->replaceUsesWithIf (LaneDefs[0 ], [DefR ](VPUser &U, unsigned ) {
573
+ return U.onlyFirstLaneUsed (DefR );
556
574
});
557
575
558
- // Update each build vector user that currently has RepR as its only
576
+ // Update each build vector user that currently has DefR as its only
559
577
// operand, to have all LaneDefs as its operands.
560
- for (VPUser *U : to_vector (RepR ->users ())) {
578
+ for (VPUser *U : to_vector (DefR ->users ())) {
561
579
auto *VPI = dyn_cast<VPInstruction>(U);
562
580
if (!VPI || (VPI->getOpcode () != VPInstruction::BuildVector &&
563
581
VPI->getOpcode () != VPInstruction::BuildStructVector))
@@ -569,7 +587,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
569
587
for (VPValue *LaneDef : drop_begin (LaneDefs))
570
588
VPI->addOperand (LaneDef);
571
589
}
572
- ToRemove.push_back (RepR );
590
+ ToRemove.push_back (DefR );
573
591
}
574
592
}
575
593
for (auto *R : reverse (ToRemove))
0 commit comments