@@ -525,14 +525,13 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
525525
526526 static inline bool classof (const VPRecipeBase *R) {
527527 switch (R->getVPDefID ()) {
528+ case VPRecipeBase::VPBundleSC:
528529 case VPRecipeBase::VPDerivedIVSC:
529530 case VPRecipeBase::VPEVLBasedIVPHISC:
530531 case VPRecipeBase::VPExpandSCEVSC:
531532 case VPRecipeBase::VPInstructionSC:
532533 case VPRecipeBase::VPReductionEVLSC:
533534 case VPRecipeBase::VPReductionSC:
534- case VPRecipeBase::VPMulAccumulateReductionSC:
535- case VPRecipeBase::VPExtendedReductionSC:
536535 case VPRecipeBase::VPReplicateSC:
537536 case VPRecipeBase::VPScalarIVStepsSC:
538537 case VPRecipeBase::VPVectorPointerSC:
@@ -852,9 +851,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
852851 R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
853852 R->getVPDefID () == VPRecipeBase::VPReplicateSC ||
854853 R->getVPDefID () == VPRecipeBase::VPVectorEndPointerSC ||
855- R->getVPDefID () == VPRecipeBase::VPVectorPointerSC ||
856- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
857- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
854+ R->getVPDefID () == VPRecipeBase::VPVectorPointerSC;
858855 }
859856
860857 static inline bool classof (const VPUser *U) {
@@ -2431,29 +2428,6 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24312428 }
24322429 setUnderlyingValue (I);
24332430 }
2434-
2435- // / For VPExtendedReductionRecipe.
2436- // / Note that the debug location is from the extend.
2437- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2438- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2439- bool IsOrdered, DebugLoc DL)
2440- : VPRecipeWithIRFlags(SC, Operands, DL), RdxKind(RdxKind),
2441- IsOrdered(IsOrdered), IsConditional(CondOp) {
2442- if (CondOp)
2443- addOperand (CondOp);
2444- }
2445-
2446- // / For VPMulAccumulateReductionRecipe.
2447- // / Note that the NUW/NSW flags and the debug location are from the Mul.
2448- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2449- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2450- bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL)
2451- : VPRecipeWithIRFlags(SC, Operands, WrapFlags, DL), RdxKind(RdxKind),
2452- IsOrdered(IsOrdered), IsConditional(CondOp) {
2453- if (CondOp)
2454- addOperand (CondOp);
2455- }
2456-
24572431public:
24582432 VPReductionRecipe (RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
24592433 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
@@ -2479,9 +2453,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24792453
24802454 static inline bool classof (const VPRecipeBase *R) {
24812455 return R->getVPDefID () == VPRecipeBase::VPReductionSC ||
2482- R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
2483- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
2484- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
2456+ R->getVPDefID () == VPRecipeBase::VPReductionEVLSC;
24852457 }
24862458
24872459 static inline bool classof (const VPUser *U) {
@@ -2620,190 +2592,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
26202592 }
26212593};
26222594
2623- // / A recipe to represent inloop extended reduction operations, performing a
2624- // / reduction on a extended vector operand into a scalar value, and adding the
2625- // / result to a chain. This recipe is abstract and needs to be lowered to
2626- // / concrete recipes before codegen. The operands are {ChainOp, VecOp,
2627- // / [Condition]}.
2628- class VPExtendedReductionRecipe : public VPReductionRecipe {
2629- // / Opcode of the extend for VecOp.
2630- Instruction::CastOps ExtOp;
2631-
2632- // / The scalar type after extending.
2633- Type *ResultTy;
2634-
2635- // / For cloning VPExtendedReductionRecipe.
2636- VPExtendedReductionRecipe (VPExtendedReductionRecipe *ExtRed)
2637- : VPReductionRecipe(
2638- VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind (),
2639- {ExtRed->getChainOp (), ExtRed->getVecOp ()}, ExtRed->getCondOp (),
2640- ExtRed->isOrdered(), ExtRed->getDebugLoc()),
2641- ExtOp(ExtRed->getExtOpcode ()), ResultTy(ExtRed->getResultType ()) {
2642- transferFlags (*ExtRed);
2643- setUnderlyingValue (ExtRed->getUnderlyingValue ());
2644- }
2645-
2646- public:
2647- VPExtendedReductionRecipe (VPReductionRecipe *R, VPWidenCastRecipe *Ext)
2648- : VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind (),
2649- {R->getChainOp (), Ext->getOperand (0 )}, R->getCondOp (),
2650- R->isOrdered(), Ext->getDebugLoc()),
2651- ExtOp(Ext->getOpcode ()), ResultTy(Ext->getResultType ()) {
2652- assert ((ExtOp == Instruction::CastOps::ZExt ||
2653- ExtOp == Instruction::CastOps::SExt) &&
2654- " VPExtendedReductionRecipe only supports zext and sext." );
2655-
2656- transferFlags (*Ext);
2657- setUnderlyingValue (R->getUnderlyingValue ());
2658- }
2659-
2660- ~VPExtendedReductionRecipe () override = default ;
2661-
2662- VPExtendedReductionRecipe *clone () override {
2663- return new VPExtendedReductionRecipe (this );
2664- }
2665-
2666- VP_CLASSOF_IMPL (VPDef::VPExtendedReductionSC);
2667-
2668- void execute (VPTransformState &State) override {
2669- llvm_unreachable (" VPExtendedReductionRecipe should be transform to "
2670- " VPExtendedRecipe + VPReductionRecipe before execution." );
2671- };
2672-
2673- // / Return the cost of VPExtendedReductionRecipe.
2674- InstructionCost computeCost (ElementCount VF,
2675- VPCostContext &Ctx) const override ;
2676-
2677- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2678- // / Print the recipe.
2679- void print (raw_ostream &O, const Twine &Indent,
2680- VPSlotTracker &SlotTracker) const override ;
2681- #endif
2682-
2683- // / The scalar type after extending.
2684- Type *getResultType () const { return ResultTy; }
2685-
2686- // / Is the extend ZExt?
2687- bool isZExt () const { return getExtOpcode () == Instruction::ZExt; }
2688-
2689- // / Get the opcode of the extend for VecOp.
2690- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2691- };
2692-
2693- // / A recipe to represent inloop MulAccumulateReduction operations, multiplying
2694- // / the vector operands (which may be extended), performing a reduction.add on
2695- // / the result, and adding the scalar result to a chain. This recipe is abstract
2696- // / and needs to be lowered to concrete recipes before codegen. The operands are
2697- // / {ChainOp, VecOp1, VecOp2, [Condition]}.
2698- class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
2699- // / Opcode of the extend for VecOp1 and VecOp2.
2700- Instruction::CastOps ExtOp;
2701-
2702- // / Non-neg flag of the extend recipe.
2703- bool IsNonNeg = false ;
2704-
2705- // / The scalar type after extending.
2706- Type *ResultTy = nullptr ;
2707-
2708- // / For cloning VPMulAccumulateReductionRecipe.
2709- VPMulAccumulateReductionRecipe (VPMulAccumulateReductionRecipe *MulAcc)
2710- : VPReductionRecipe(
2711- VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind (),
2712- {MulAcc->getChainOp (), MulAcc->getVecOp0 (), MulAcc->getVecOp1 ()},
2713- MulAcc->getCondOp (), MulAcc->isOrdered(),
2714- WrapFlagsTy(MulAcc->hasNoUnsignedWrap (), MulAcc->hasNoSignedWrap()),
2715- MulAcc->getDebugLoc()),
2716- ExtOp(MulAcc->getExtOpcode ()), IsNonNeg(MulAcc->isNonNeg ()),
2717- ResultTy(MulAcc->getResultType ()) {
2718- transferFlags (*MulAcc);
2719- setUnderlyingValue (MulAcc->getUnderlyingValue ());
2720- }
2721-
2722- public:
2723- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2724- VPWidenCastRecipe *Ext0,
2725- VPWidenCastRecipe *Ext1, Type *ResultTy)
2726- : VPReductionRecipe(
2727- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2728- {R->getChainOp (), Ext0->getOperand (0 ), Ext1->getOperand (0 )},
2729- R->getCondOp (), R->isOrdered(),
2730- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2731- R->getDebugLoc()),
2732- ExtOp(Ext0->getOpcode ()), ResultTy(ResultTy) {
2733- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2734- Instruction::Add &&
2735- " The reduction instruction in MulAccumulateteReductionRecipe must "
2736- " be Add" );
2737- assert ((ExtOp == Instruction::CastOps::ZExt ||
2738- ExtOp == Instruction::CastOps::SExt) &&
2739- " VPMulAccumulateReductionRecipe only supports zext and sext." );
2740- setUnderlyingValue (R->getUnderlyingValue ());
2741- // Only set the non-negative flag if the original recipe contains.
2742- if (Ext0->hasNonNegFlag ())
2743- IsNonNeg = Ext0->isNonNeg ();
2744- }
2745-
2746- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2747- Type *ResultTy)
2748- : VPReductionRecipe(
2749- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2750- {R->getChainOp (), Mul->getOperand (0 ), Mul->getOperand (1 )},
2751- R->getCondOp (), R->isOrdered(),
2752- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2753- R->getDebugLoc()),
2754- ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) {
2755- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2756- Instruction::Add &&
2757- " The reduction instruction in MulAccumulateReductionRecipe must be "
2758- " Add" );
2759- setUnderlyingValue (R->getUnderlyingValue ());
2760- }
2761-
2762- ~VPMulAccumulateReductionRecipe () override = default ;
2763-
2764- VPMulAccumulateReductionRecipe *clone () override {
2765- return new VPMulAccumulateReductionRecipe (this );
2766- }
2767-
2768- VP_CLASSOF_IMPL (VPDef::VPMulAccumulateReductionSC);
2769-
2770- void execute (VPTransformState &State) override {
2771- llvm_unreachable (" VPMulAccumulateReductionRecipe should transform to "
2772- " VPWidenCastRecipe + "
2773- " VPWidenRecipe + VPReductionRecipe before execution" );
2774- }
2775-
2776- // / Return the cost of VPMulAccumulateReductionRecipe.
2777- InstructionCost computeCost (ElementCount VF,
2778- VPCostContext &Ctx) const override ;
2779-
2780- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2781- // / Print the recipe.
2782- void print (raw_ostream &O, const Twine &Indent,
2783- VPSlotTracker &SlotTracker) const override ;
2784- #endif
2785-
2786- Type *getResultType () const { return ResultTy; }
2787-
2788- // / The first vector value to be extended and reduced.
2789- VPValue *getVecOp0 () const { return getOperand (1 ); }
2790-
2791- // / The second vector value to be extended and reduced.
2792- VPValue *getVecOp1 () const { return getOperand (2 ); }
2793-
2794- // / Return true if this recipe contains extended operands.
2795- bool isExtended () const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
2796-
2797- // / Return the opcode of the extends for the operands.
2798- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2799-
2800- // / Return if the operands are zero-extended.
2801- bool isZExt () const { return ExtOp == Instruction::CastOps::ZExt; }
2802-
2803- // / Return true if the operand extends have the non-negative flag.
2804- bool isNonNeg () const { return IsNonNeg; }
2805- };
2806-
28072595// / VPReplicateRecipe replicates a given instruction producing multiple scalar
28082596// / copies of the original scalar type, one per lane, instead of producing a
28092597// / single copy of widened type for all lanes. If the instruction is known to be
@@ -2922,6 +2710,123 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
29222710 }
29232711};
29242712
2713+ // / A recipe to combine multiple recipes into a 'bundle' recipe, which should be
2714+ // / considered as single entity for cost-modeling and transforms. The recipe
2715+ // / needs to be 'unbundled', i.e. replaced by its individual recipes before
2716+ // / execute.
2717+ class VPBundleRecipe : public VPSingleDefRecipe {
2718+ enum class BundleTypes {
2719+ ExtendedReduction,
2720+ MulAccumulateReduction,
2721+ };
2722+
2723+ // / Recipes bundled together in this VPBundleRecipe.
2724+ SmallVector<VPSingleDefRecipe *> BundledOps;
2725+
2726+ // / Temporary VPValues used for external operands of the bundle, i.e. operands
2727+ // / not defined by recipes in the bundle.
2728+ SmallVector<VPValue *> TmpValues;
2729+
2730+ // / Type of the bundle.
2731+ BundleTypes BundleType;
2732+
2733+ VPBundleRecipe (BundleTypes BundleType, ArrayRef<VPSingleDefRecipe *> ToBundle)
2734+ : VPSingleDefRecipe(VPDef::VPBundleSC, {}, {}), BundledOps(ToBundle),
2735+ BundleType (BundleType) {
2736+ // Bundle up the operand recipes.
2737+ SmallPtrSet<VPUser *, 4 > BundledUsers;
2738+ for (auto *R : ToBundle)
2739+ BundledUsers.insert (R);
2740+
2741+ // Recipes in the bundle, expect the last one, must only be used inside the
2742+ // bundle. If there other external users, clone the recipes for the bundle.
2743+ for (const auto &[Idx, R] : enumerate(drop_end (ToBundle))) {
2744+ if (all_of (R->users (), [&BundledUsers](VPUser *U) {
2745+ return BundledUsers.contains (U);
2746+ })) {
2747+ if (R->getParent ())
2748+ R->removeFromParent ();
2749+ continue ;
2750+ }
2751+ // There users external to the bundle. Clone the recipe for use in the
2752+ // bundle and update all its in-bundle users.
2753+ this ->BundledOps [Idx] = R->clone ();
2754+ BundledUsers.insert (this ->BundledOps [Idx]);
2755+ R->replaceUsesWithIf (this ->BundledOps [Idx],
2756+ [&BundledUsers](VPUser &U, unsigned ) {
2757+ return BundledUsers.contains (&U);
2758+ });
2759+ }
2760+ BundledOps.back ()->removeFromParent ();
2761+
2762+ // Internalize all external operands to the bundled operations. To do so,
2763+ // create new temporary VPValues for all operands not defined by recipe in
2764+ // the bundle. The original operands are added as operands of the
2765+ // VPBundleRecipe.
2766+ for (auto *R : this ->BundledOps ) {
2767+ for (const auto &[Idx, Op] : enumerate(R->operands ())) {
2768+ auto *Def = Op->getDefiningRecipe ();
2769+ if (Def && BundledUsers.contains (Def))
2770+ continue ;
2771+ addOperand (Op);
2772+ TmpValues.push_back (new VPValue ());
2773+ R->setOperand (Idx, TmpValues.back ());
2774+ }
2775+ }
2776+ }
2777+
2778+ public:
2779+ VPBundleRecipe (VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2780+ : VPBundleRecipe(BundleTypes::ExtendedReduction, {Ext, Red}) {}
2781+ VPBundleRecipe (VPWidenRecipe *Mul, VPReductionRecipe *Red)
2782+ : VPBundleRecipe(BundleTypes::MulAccumulateReduction, {Mul, Red}) {}
2783+ VPBundleRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2784+ VPWidenRecipe *Mul, VPReductionRecipe *Red)
2785+ : VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2786+ {Ext0, Ext1, Mul, Red}) {}
2787+ VPBundleRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2788+ VPWidenRecipe *Mul, VPWidenCastRecipe *Ext2,
2789+ VPReductionRecipe *Red)
2790+ : VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2791+ {Ext0, Ext1, Mul, Ext2, Red}) {}
2792+
2793+ ~VPBundleRecipe () override {
2794+ SmallPtrSet<VPRecipeBase *, 4 > Seen;
2795+ for (auto *R : reverse (BundledOps))
2796+ if (Seen.insert (R).second )
2797+ delete R;
2798+ for (VPValue *T : TmpValues)
2799+ delete T;
2800+ }
2801+
2802+ VP_CLASSOF_IMPL (VPDef::VPBundleSC)
2803+
2804+ VPBundleRecipe *clone () override {
2805+ return new VPBundleRecipe (BundleType, BundledOps);
2806+ }
2807+
2808+ // / Return the VPSingleDefRecipe producing the final result of the bundled
2809+ // / recipe.
2810+ VPSingleDefRecipe *getResultOp () const { return BundledOps.back (); }
2811+
2812+ void unbundle ();
2813+
2814+ // / Generate the extraction of the appropriate bit from the block mask and the
2815+ // / conditional branch.
2816+ void execute (VPTransformState &State) override {
2817+ llvm_unreachable (" recipe must be removed before execute" );
2818+ }
2819+
2820+ InstructionCost computeCost (ElementCount VF,
2821+ VPCostContext &Ctx) const override ;
2822+
2823+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2824+ // / Print the recipe.
2825+ void print (raw_ostream &O, const Twine &Indent,
2826+ VPSlotTracker &SlotTracker) const override ;
2827+ #endif
2828+ };
2829+
29252830// / VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
29262831// / control converges back from a Branch-on-Mask. The phi nodes are needed in
29272832// / order to merge values that are set under such a branch and feed their uses.
0 commit comments