@@ -528,11 +528,10 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
528528 case VPRecipeBase::VPDerivedIVSC:
529529 case VPRecipeBase::VPEVLBasedIVPHISC:
530530 case VPRecipeBase::VPExpandSCEVSC:
531+ case VPRecipeBase::VPExpressionSC:
531532 case VPRecipeBase::VPInstructionSC:
532533 case VPRecipeBase::VPReductionEVLSC:
533534 case VPRecipeBase::VPReductionSC:
534- case VPRecipeBase::VPMulAccumulateReductionSC:
535- case VPRecipeBase::VPExtendedReductionSC:
536535 case VPRecipeBase::VPReplicateSC:
537536 case VPRecipeBase::VPScalarIVStepsSC:
538537 case VPRecipeBase::VPVectorPointerSC:
@@ -852,9 +851,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
852851 R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
853852 R->getVPDefID () == VPRecipeBase::VPReplicateSC ||
854853 R->getVPDefID () == VPRecipeBase::VPVectorEndPointerSC ||
855- R->getVPDefID () == VPRecipeBase::VPVectorPointerSC ||
856- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
857- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
854+ R->getVPDefID () == VPRecipeBase::VPVectorPointerSC;
858855 }
859856
860857 static inline bool classof (const VPUser *U) {
@@ -2440,28 +2437,6 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24402437 setUnderlyingValue (I);
24412438 }
24422439
2443- // / For VPExtendedReductionRecipe.
2444- // / Note that the debug location is from the extend.
2445- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2446- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2447- bool IsOrdered, DebugLoc DL)
2448- : VPRecipeWithIRFlags(SC, Operands, DL), RdxKind(RdxKind),
2449- IsOrdered(IsOrdered), IsConditional(CondOp) {
2450- if (CondOp)
2451- addOperand (CondOp);
2452- }
2453-
2454- // / For VPMulAccumulateReductionRecipe.
2455- // / Note that the NUW/NSW flags and the debug location are from the Mul.
2456- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2457- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2458- bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL)
2459- : VPRecipeWithIRFlags(SC, Operands, WrapFlags, DL), RdxKind(RdxKind),
2460- IsOrdered(IsOrdered), IsConditional(CondOp) {
2461- if (CondOp)
2462- addOperand (CondOp);
2463- }
2464-
24652440public:
24662441 VPReductionRecipe (RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
24672442 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
@@ -2487,9 +2462,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24872462
24882463 static inline bool classof (const VPRecipeBase *R) {
24892464 return R->getVPDefID () == VPRecipeBase::VPReductionSC ||
2490- R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
2491- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
2492- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
2465+ R->getVPDefID () == VPRecipeBase::VPReductionEVLSC;
24932466 }
24942467
24952468 static inline bool classof (const VPUser *U) {
@@ -2628,190 +2601,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
26282601 }
26292602};
26302603
2631- // / A recipe to represent inloop extended reduction operations, performing a
2632- // / reduction on a extended vector operand into a scalar value, and adding the
2633- // / result to a chain. This recipe is abstract and needs to be lowered to
2634- // / concrete recipes before codegen. The operands are {ChainOp, VecOp,
2635- // / [Condition]}.
2636- class VPExtendedReductionRecipe : public VPReductionRecipe {
2637- // / Opcode of the extend for VecOp.
2638- Instruction::CastOps ExtOp;
2639-
2640- // / The scalar type after extending.
2641- Type *ResultTy;
2642-
2643- // / For cloning VPExtendedReductionRecipe.
2644- VPExtendedReductionRecipe (VPExtendedReductionRecipe *ExtRed)
2645- : VPReductionRecipe(
2646- VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind (),
2647- {ExtRed->getChainOp (), ExtRed->getVecOp ()}, ExtRed->getCondOp (),
2648- ExtRed->isOrdered(), ExtRed->getDebugLoc()),
2649- ExtOp(ExtRed->getExtOpcode ()), ResultTy(ExtRed->getResultType ()) {
2650- transferFlags (*ExtRed);
2651- setUnderlyingValue (ExtRed->getUnderlyingValue ());
2652- }
2653-
2654- public:
2655- VPExtendedReductionRecipe (VPReductionRecipe *R, VPWidenCastRecipe *Ext)
2656- : VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind (),
2657- {R->getChainOp (), Ext->getOperand (0 )}, R->getCondOp (),
2658- R->isOrdered(), Ext->getDebugLoc()),
2659- ExtOp(Ext->getOpcode ()), ResultTy(Ext->getResultType ()) {
2660- assert ((ExtOp == Instruction::CastOps::ZExt ||
2661- ExtOp == Instruction::CastOps::SExt) &&
2662- " VPExtendedReductionRecipe only supports zext and sext." );
2663-
2664- transferFlags (*Ext);
2665- setUnderlyingValue (R->getUnderlyingValue ());
2666- }
2667-
2668- ~VPExtendedReductionRecipe () override = default ;
2669-
2670- VPExtendedReductionRecipe *clone () override {
2671- return new VPExtendedReductionRecipe (this );
2672- }
2673-
2674- VP_CLASSOF_IMPL (VPDef::VPExtendedReductionSC);
2675-
2676- void execute (VPTransformState &State) override {
2677- llvm_unreachable (" VPExtendedReductionRecipe should be transform to "
2678- " VPExtendedRecipe + VPReductionRecipe before execution." );
2679- };
2680-
2681- // / Return the cost of VPExtendedReductionRecipe.
2682- InstructionCost computeCost (ElementCount VF,
2683- VPCostContext &Ctx) const override ;
2684-
2685- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2686- // / Print the recipe.
2687- void print (raw_ostream &O, const Twine &Indent,
2688- VPSlotTracker &SlotTracker) const override ;
2689- #endif
2690-
2691- // / The scalar type after extending.
2692- Type *getResultType () const { return ResultTy; }
2693-
2694- // / Is the extend ZExt?
2695- bool isZExt () const { return getExtOpcode () == Instruction::ZExt; }
2696-
2697- // / Get the opcode of the extend for VecOp.
2698- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2699- };
2700-
2701- // / A recipe to represent inloop MulAccumulateReduction operations, multiplying
2702- // / the vector operands (which may be extended), performing a reduction.add on
2703- // / the result, and adding the scalar result to a chain. This recipe is abstract
2704- // / and needs to be lowered to concrete recipes before codegen. The operands are
2705- // / {ChainOp, VecOp1, VecOp2, [Condition]}.
2706- class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
2707- // / Opcode of the extend for VecOp1 and VecOp2.
2708- Instruction::CastOps ExtOp;
2709-
2710- // / Non-neg flag of the extend recipe.
2711- bool IsNonNeg = false ;
2712-
2713- // / The scalar type after extending.
2714- Type *ResultTy = nullptr ;
2715-
2716- // / For cloning VPMulAccumulateReductionRecipe.
2717- VPMulAccumulateReductionRecipe (VPMulAccumulateReductionRecipe *MulAcc)
2718- : VPReductionRecipe(
2719- VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind (),
2720- {MulAcc->getChainOp (), MulAcc->getVecOp0 (), MulAcc->getVecOp1 ()},
2721- MulAcc->getCondOp (), MulAcc->isOrdered(),
2722- WrapFlagsTy(MulAcc->hasNoUnsignedWrap (), MulAcc->hasNoSignedWrap()),
2723- MulAcc->getDebugLoc()),
2724- ExtOp(MulAcc->getExtOpcode ()), IsNonNeg(MulAcc->isNonNeg ()),
2725- ResultTy(MulAcc->getResultType ()) {
2726- transferFlags (*MulAcc);
2727- setUnderlyingValue (MulAcc->getUnderlyingValue ());
2728- }
2729-
2730- public:
2731- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2732- VPWidenCastRecipe *Ext0,
2733- VPWidenCastRecipe *Ext1, Type *ResultTy)
2734- : VPReductionRecipe(
2735- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2736- {R->getChainOp (), Ext0->getOperand (0 ), Ext1->getOperand (0 )},
2737- R->getCondOp (), R->isOrdered(),
2738- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2739- R->getDebugLoc()),
2740- ExtOp(Ext0->getOpcode ()), ResultTy(ResultTy) {
2741- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2742- Instruction::Add &&
2743- " The reduction instruction in MulAccumulateteReductionRecipe must "
2744- " be Add" );
2745- assert ((ExtOp == Instruction::CastOps::ZExt ||
2746- ExtOp == Instruction::CastOps::SExt) &&
2747- " VPMulAccumulateReductionRecipe only supports zext and sext." );
2748- setUnderlyingValue (R->getUnderlyingValue ());
2749- // Only set the non-negative flag if the original recipe contains.
2750- if (Ext0->hasNonNegFlag ())
2751- IsNonNeg = Ext0->isNonNeg ();
2752- }
2753-
2754- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2755- Type *ResultTy)
2756- : VPReductionRecipe(
2757- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2758- {R->getChainOp (), Mul->getOperand (0 ), Mul->getOperand (1 )},
2759- R->getCondOp (), R->isOrdered(),
2760- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2761- R->getDebugLoc()),
2762- ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) {
2763- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2764- Instruction::Add &&
2765- " The reduction instruction in MulAccumulateReductionRecipe must be "
2766- " Add" );
2767- setUnderlyingValue (R->getUnderlyingValue ());
2768- }
2769-
2770- ~VPMulAccumulateReductionRecipe () override = default ;
2771-
2772- VPMulAccumulateReductionRecipe *clone () override {
2773- return new VPMulAccumulateReductionRecipe (this );
2774- }
2775-
2776- VP_CLASSOF_IMPL (VPDef::VPMulAccumulateReductionSC);
2777-
2778- void execute (VPTransformState &State) override {
2779- llvm_unreachable (" VPMulAccumulateReductionRecipe should transform to "
2780- " VPWidenCastRecipe + "
2781- " VPWidenRecipe + VPReductionRecipe before execution" );
2782- }
2783-
2784- // / Return the cost of VPMulAccumulateReductionRecipe.
2785- InstructionCost computeCost (ElementCount VF,
2786- VPCostContext &Ctx) const override ;
2787-
2788- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2789- // / Print the recipe.
2790- void print (raw_ostream &O, const Twine &Indent,
2791- VPSlotTracker &SlotTracker) const override ;
2792- #endif
2793-
2794- Type *getResultType () const { return ResultTy; }
2795-
2796- // / The first vector value to be extended and reduced.
2797- VPValue *getVecOp0 () const { return getOperand (1 ); }
2798-
2799- // / The second vector value to be extended and reduced.
2800- VPValue *getVecOp1 () const { return getOperand (2 ); }
2801-
2802- // / Return true if this recipe contains extended operands.
2803- bool isExtended () const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
2804-
2805- // / Return the opcode of the extends for the operands.
2806- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2807-
2808- // / Return if the operands are zero-extended.
2809- bool isZExt () const { return ExtOp == Instruction::CastOps::ZExt; }
2810-
2811- // / Return true if the operand extends have the non-negative flag.
2812- bool isNonNeg () const { return IsNonNeg; }
2813- };
2814-
28152604// / VPReplicateRecipe replicates a given instruction producing multiple scalar
28162605// / copies of the original scalar type, one per lane, instead of producing a
28172606// / single copy of widened type for all lanes. If the instruction is known to be
@@ -2930,6 +2719,122 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
29302719 }
29312720};
29322721
2722+ // / A recipe to combine multiple recipes into a single 'expression' recipe,
2723+ // / which should be considered a single entity for cost-modeling and transforms.
2724+ // / The recipe needs to be 'decomposed', i.e. replaced by its individual
2725+ // / expression recipes, before execute. The individual expression recipes are
2726+ // / completely disconnected from the def-use graph of other recipes not part of
2727+ // / the expression. Def-use edges between pairs of expression recipes remain
2728+ // / intact, whereas every edge between an expression recipe and a recipe outside
2729+ // / the expression is elevated to connect the non-expression recipe with the
2730+ // / VPExpressionRecipe itself.
2731+ class VPExpressionRecipe : public VPSingleDefRecipe {
2732+ // / Recipes included in this VPExpressionRecipe.
2733+ SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
2734+
2735+ // / Temporary VPValues used for external operands of the expression, i.e.
2736+ // / operands not defined by recipes in the expression.
2737+ SmallVector<VPValue *> LiveInPlaceholders;
2738+
2739+ enum class ExpressionTypes {
2740+ // / Represents an inloop extended reduction operation, performing a
2741+ // / reduction on an extended vector operand into a scalar value, and adding
2742+ // / the result to a chain.
2743+ ExtendedReduction,
2744+ // / Represent an inloop multiply-accumulate reduction, multiplying the
2745+ // / extended vector operands, performing a reduction.add on the result, and
2746+ // / adding the scalar result to a chain.
2747+ ExtMulAccReduction,
2748+ // / Represent an inloop multiply-accumulate reduction, multiplying the
2749+ // / vector operands, performing a reduction.add on the result, and adding
2750+ // / the scalar result to a chain.
2751+ MulAccReduction,
2752+ };
2753+
2754+ // / Type of the expression.
2755+ ExpressionTypes ExpressionType;
2756+
2757+ // / Construct a new VPExpressionRecipe by internalizing recipes in \p
2758+ // / ExpressionRecipes. External operands (i.e. not defined by another recipe
2759+ // / in the expression) are replaced by temporary VPValues and the original
2760+ // / operands are transferred to the VPExpressionRecipe itself. Clone recipes
2761+ // / as needed (excluding last) to ensure they are only used by other recipes
2762+ // / in the expression.
2763+ VPExpressionRecipe (ExpressionTypes ExpressionType,
2764+ ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
2765+
2766+ public:
2767+ VPExpressionRecipe (VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2768+ : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
2769+ VPExpressionRecipe (VPWidenRecipe *Mul, VPReductionRecipe *Red)
2770+ : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
2771+ VPExpressionRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2772+ VPWidenRecipe *Mul, VPReductionRecipe *Red)
2773+ : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
2774+ {Ext0, Ext1, Mul, Red}) {}
2775+
2776+ ~VPExpressionRecipe () override {
2777+ for (auto *R : reverse (ExpressionRecipes))
2778+ delete R;
2779+ for (VPValue *T : LiveInPlaceholders)
2780+ delete T;
2781+ }
2782+
2783+ VP_CLASSOF_IMPL (VPDef::VPExpressionSC)
2784+
2785+ VPExpressionRecipe *clone () override {
2786+ assert (!ExpressionRecipes.empty () && " empty expressions should be removed" );
2787+ SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
2788+ for (auto *R : ExpressionRecipes)
2789+ NewExpressiondRecipes.push_back (R->clone ());
2790+ for (auto *New : NewExpressiondRecipes) {
2791+ for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
2792+ New->replaceUsesOfWith (Old, NewExpressiondRecipes[Idx]);
2793+ // Update placeholder operands in the cloned recipe to use the external
2794+ // operands, to be internalized when the cloned expression is constructed.
2795+ for (const auto &[Placeholder, OutsideOp] :
2796+ zip (LiveInPlaceholders, operands ()))
2797+ New->replaceUsesOfWith (Placeholder, OutsideOp);
2798+ }
2799+ return new VPExpressionRecipe (ExpressionType, NewExpressiondRecipes);
2800+ }
2801+
2802+ // / Return the VPValue to use to infer the result type of the recipe.
2803+ VPValue *getOperandOfResultType () const {
2804+ unsigned OpIdx =
2805+ cast<VPReductionRecipe>(ExpressionRecipes.back ())->isConditional () ? 2
2806+ : 1 ;
2807+ return getOperand (getNumOperands () - OpIdx);
2808+ }
2809+
2810+ // / Insert the recipes of the expression back into the VPlan, directly before
2811+ // / the current recipe. Leaves the expression recipe empty, which must be
2812+ // / removed before codegen.
2813+ void decompose ();
2814+
2815+ // / Method for generating code, must not be called as this recipe is abstract.
2816+ void execute (VPTransformState &State) override {
2817+ llvm_unreachable (" recipe must be removed before execute" );
2818+ }
2819+
2820+ InstructionCost computeCost (ElementCount VF,
2821+ VPCostContext &Ctx) const override ;
2822+
2823+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2824+ // / Print the recipe.
2825+ void print (raw_ostream &O, const Twine &Indent,
2826+ VPSlotTracker &SlotTracker) const override ;
2827+ #endif
2828+
2829+ // / Returns true if this expression contains recipes that may read from or
2830+ // / write to memory.
2831+ bool mayReadOrWriteMemory () const ;
2832+
2833+ // / Returns true if this expression contains recipes that may have side
2834+ // / effects.
2835+ bool mayHaveSideEffects () const ;
2836+ };
2837+
29332838// / VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
29342839// / control converges back from a Branch-on-Mask. The phi nodes are needed in
29352840// / order to merge values that are set under such a branch and feed their uses.
0 commit comments