@@ -2440,24 +2440,74 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
24402440 Ctx.CostKind );
24412441}
24422442
2443+ void VPBundleRecipe::bundle (ArrayRef<VPValue *> Operands) {
2444+ assert (!BundledRecipes.empty () && " Nothing to bundle?" );
2445+
2446+ // Bundle up the operand recipes.
2447+ SmallPtrSet<VPUser *, 4 > BundledUsers;
2448+ for (auto *R : BundledRecipes)
2449+ BundledUsers.insert (R);
2450+
2451+ // Recipes in the bundle, except the last one, must only be used inside the
2452+ // bundle. If there other external users, clone the recipes for the bundle.
2453+ for (unsigned Idx = 0 ; Idx != BundledRecipes.size () - 1 ; ++Idx) {
2454+ VPSingleDefRecipe *R = BundledRecipes[Idx];
2455+ if (all_of (R->users (), [&BundledUsers](VPUser *U) {
2456+ return BundledUsers.contains (U);
2457+ })) {
2458+ if (R->getParent ())
2459+ R->removeFromParent ();
2460+ continue ;
2461+ }
2462+ // The users external to the bundle. Clone the recipe for use in the
2463+ // bundle and update all its in-bundle users.
2464+ VPSingleDefRecipe *Copy = R->clone ();
2465+ BundledRecipes[Idx] = Copy;
2466+ BundledUsers.insert (Copy);
2467+ R->replaceUsesWithIf (Copy, [&BundledUsers](VPUser &U, unsigned ) {
2468+ return BundledUsers.contains (&U);
2469+ });
2470+ }
2471+ if (BundledRecipes.back ()->getParent ())
2472+ BundledRecipes.back ()->removeFromParent ();
2473+
2474+ // Internalize all external operands to the bundled operations. To do so,
2475+ // create new temporary VPValues for all operands not defined by recipe in
2476+ // the bundle. The original operands are added as operands of the
2477+ // VPBundleRecipe.
2478+ for (auto *R : BundledRecipes) {
2479+ for (const auto &[Idx, Op] : enumerate(R->operands ())) {
2480+ auto *Def = Op->getDefiningRecipe ();
2481+ if (Def && BundledUsers.contains (Def))
2482+ continue ;
2483+ if (Operands.empty ())
2484+ addOperand (Op);
2485+ else
2486+ addOperand (Operands[TmpValues.size ()]);
2487+ TmpValues.push_back (new VPValue ());
2488+ R->setOperand (Idx, TmpValues.back ());
2489+ }
2490+ }
2491+ }
2492+
24432493void VPBundleRecipe::unbundle () {
2444- for (auto *Op : BundledOps )
2445- if (!Op ->getParent ())
2446- Op ->insertBefore (this );
2494+ for (auto *R : BundledRecipes )
2495+ if (!R ->getParent ())
2496+ R ->insertBefore (this );
24472497
24482498 for (const auto &[Idx, Op] : enumerate(operands ()))
24492499 TmpValues[Idx]->replaceAllUsesWith (Op);
24502500
2451- replaceAllUsesWith (getResultOp ());
2501+ replaceAllUsesWith (getResultRecipe ());
24522502
24532503 if (BundleType == BundleTypes::MulAccumulateReduction &&
2454- BundledOps .size () == 5 ) {
2504+ BundledRecipes .size () == 5 ) {
24552505 // Note that we will drop the extend after mul which transforms
24562506 // reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)).
24572507 // TODO: This transform should be done separately from bundling/unbundling.
2458- auto *Ext0 = cast<VPWidenCastRecipe>(BundledOps [0 ]);
2459- auto *Ext1 = cast<VPWidenCastRecipe>(BundledOps [1 ]);
2460- auto *Ext2 = cast<VPWidenCastRecipe>(BundledOps [3 ]);
2508+ auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes [0 ]);
2509+ auto *Ext1 = cast<VPWidenCastRecipe>(BundledRecipes [1 ]);
2510+ auto *Ext2 = cast<VPWidenCastRecipe>(BundledRecipes [3 ]);
24612511 auto *Op0 =
24622512 new VPWidenCastRecipe (Ext0->getOpcode (), Ext0->getOperand (0 ),
24632513 Ext2->getResultType (), *Ext0, getDebugLoc ());
@@ -2469,8 +2519,8 @@ void VPBundleRecipe::unbundle() {
24692519 Ext2->getResultType (), *Ext1, getDebugLoc ());
24702520 Op1->insertBefore (Ext1);
24712521 }
2472- auto *Mul = cast<VPWidenRecipe>(BundledOps [2 ]);
2473- auto *Red = cast<VPReductionRecipe>(BundledOps [4 ]);
2522+ auto *Mul = cast<VPWidenRecipe>(BundledRecipes [2 ]);
2523+ auto *Red = cast<VPReductionRecipe>(BundledRecipes [4 ]);
24742524 Mul->setOperand (0 , Op0);
24752525 Mul->setOperand (1 , Op1);
24762526 Red->setOperand (1 , Mul);
@@ -2479,7 +2529,7 @@ void VPBundleRecipe::unbundle() {
24792529 if (Ext0 != Ext1)
24802530 Ext1->eraseFromParent ();
24812531 }
2482- BundledOps .clear ();
2532+ BundledRecipes .clear ();
24832533}
24842534
24852535InstructionCost VPBundleRecipe::computeCost (ElementCount VF,
@@ -2492,17 +2542,17 @@ InstructionCost VPBundleRecipe::computeCost(ElementCount VF,
24922542 switch (BundleType) {
24932543 case BundleTypes::ExtendedReduction: {
24942544 unsigned Opcode = RecurrenceDescriptor::getOpcode (
2495- cast<VPReductionRecipe>(BundledOps [1 ])->getRecurrenceKind ());
2545+ cast<VPReductionRecipe>(BundledRecipes [1 ])->getRecurrenceKind ());
24962546 return Ctx.TTI .getExtendedReductionCost (
24972547 Opcode,
2498- cast<VPWidenCastRecipe>(BundledOps .front ())->getOpcode () ==
2548+ cast<VPWidenCastRecipe>(BundledRecipes .front ())->getOpcode () ==
24992549 Instruction::ZExt,
25002550 RedTy, SrcVecTy, std::nullopt , Ctx.CostKind );
25012551 }
25022552 case BundleTypes::MulAccumulateReduction:
25032553 return Ctx.TTI .getMulAccReductionCost (
2504- BundledOps .size () > 2
2505- ? cast<VPWidenCastRecipe>(BundledOps .front ())->getOpcode () ==
2554+ BundledRecipes .size () > 2
2555+ ? cast<VPWidenCastRecipe>(BundledRecipes .front ())->getOpcode () ==
25062556 Instruction::ZExt
25072557 : false ,
25082558 RedTy, SrcVecTy, Ctx.CostKind );
@@ -2516,7 +2566,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25162566 O << Indent << " BUNDLE " ;
25172567 printAsOperand (O, SlotTracker);
25182568 O << " = " ;
2519- auto *Red = cast<VPReductionRecipe>(BundledOps .back ());
2569+ auto *Red = cast<VPReductionRecipe>(BundledRecipes .back ());
25202570 unsigned Opcode = RecurrenceDescriptor::getOpcode (Red->getRecurrenceKind ());
25212571
25222572 switch (BundleType) {
@@ -2527,7 +2577,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25272577 getOperand (0 )->printAsOperand (O, SlotTracker);
25282578 Red->printFlags (O);
25292579
2530- auto *Ext0 = cast<VPWidenCastRecipe>(BundledOps [0 ]);
2580+ auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes [0 ]);
25312581 O << Instruction::getOpcodeName (Ext0->getOpcode ()) << " to "
25322582 << *Ext0->getResultType ();
25332583 if (Red->isConditional ()) {
@@ -2545,16 +2595,16 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25452595 RecurrenceDescriptor::getOpcode (Red->getRecurrenceKind ()))
25462596 << " (" ;
25472597 O << " mul" ;
2548- auto *Mul = cast<VPWidenRecipe>(BundledOps. size () == 2 ? BundledOps[ 0 ]
2549- : BundledOps [2 ]);
2598+ auto *Mul = cast<VPWidenRecipe>(
2599+ BundledRecipes. size () == 2 ? BundledRecipes[ 0 ] : BundledRecipes [2 ]);
25502600 Mul->printFlags (O);
2551- bool IsExtended = BundledOps .size () > 2 ;
2601+ bool IsExtended = BundledRecipes .size () > 2 ;
25522602 if (IsExtended)
25532603 O << " (" ;
25542604 getOperand (0 )->printAsOperand (O, SlotTracker);
25552605 if (IsExtended) {
25562606 auto *Ext0 = cast<VPWidenCastRecipe>(
2557- BundledOps .size () == 5 ? BundledOps [3 ] : BundledOps [0 ]);
2607+ BundledRecipes .size () == 5 ? BundledRecipes [3 ] : BundledRecipes [0 ]);
25582608 O << " " << Instruction::getOpcodeName (Ext0->getOpcode ()) << " to "
25592609 << *Ext0->getResultType () << " ), (" ;
25602610 } else {
@@ -2563,7 +2613,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
25632613 getOperand (1 )->printAsOperand (O, SlotTracker);
25642614 if (IsExtended) {
25652615 auto *Ext1 = cast<VPWidenCastRecipe>(
2566- BundledOps .size () == 5 ? BundledOps [3 ] : BundledOps [1 ]);
2616+ BundledRecipes .size () == 5 ? BundledRecipes [3 ] : BundledRecipes [1 ]);
25672617 O << " " << Instruction::getOpcodeName (Ext1->getOpcode ()) << " to "
25682618 << *Ext1->getResultType () << " )" ;
25692619 }
0 commit comments