@@ -2528,90 +2528,102 @@ void VPlanTransforms::addActiveLaneMask(
25282528 HeaderMask->eraseFromParent ();
25292529}
25302530
2531+ template <typename Op0_t, typename Op1_t> struct RemoveMask_match {
2532+ Op0_t In;
2533+ Op1_t &Out;
2534+
2535+ RemoveMask_match (const Op0_t &In, Op1_t &Out) : In(In), Out(Out) {}
2536+
2537+ template <typename OpTy> bool match (OpTy *V) const {
2538+ if (m_Specific (In).match (V)) {
2539+ Out = nullptr ;
2540+ return true ;
2541+ }
2542+ if (m_LogicalAnd (m_Specific (In), m_VPValue (Out)).match (V))
2543+ return true ;
2544+ return false ;
2545+ }
2546+ };
2547+
2548+ // / Match a specific mask \p In, or a combination of it (logical-and In, Out).
2549+ // / Returns the remaining part \p Out if so, or nullptr otherwise.
2550+ template <typename Op0_t, typename Op1_t>
2551+ static inline RemoveMask_match<Op0_t, Op1_t> m_RemoveMask (const Op0_t &In,
2552+ Op1_t &Out) {
2553+ return RemoveMask_match<Op0_t, Op1_t>(In, Out);
2554+ }
2555+
25312556// / Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
25322557// / EVL-based recipe without the header mask. Returns nullptr if no EVL-based
25332558// / recipe could be created.
25342559// / \p HeaderMask Header Mask.
25352560// / \p CurRecipe Recipe to be transform.
25362561// / \p TypeInfo VPlan-based type analysis.
2537- // / \p AllOneMask The vector mask parameter of vector-predication intrinsics.
25382562// / \p EVL The explicit vector length parameter of vector-predication
25392563// / intrinsics.
25402564static VPRecipeBase *optimizeMaskToEVL (VPValue *HeaderMask,
25412565 VPRecipeBase &CurRecipe,
2542- VPTypeAnalysis &TypeInfo,
2543- VPValue &AllOneMask, VPValue &EVL) {
2544- // FIXME: Don't transform recipes to EVL recipes if they're not masked by the
2545- // header mask.
2546- auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
2547- assert (OrigMask && " Unmasked recipe when folding tail" );
2548- // HeaderMask will be handled using EVL.
2549- VPValue *Mask;
2550- if (match (OrigMask, m_LogicalAnd (m_Specific (HeaderMask), m_VPValue (Mask))))
2551- return Mask;
2552- return HeaderMask == OrigMask ? nullptr : OrigMask;
2553- };
2566+ VPTypeAnalysis &TypeInfo, VPValue &EVL) {
2567+ VPlan *Plan = CurRecipe.getParent ()->getPlan ();
2568+ VPValue *Addr, *Mask, *EndPtr;
25542569
25552570 // / Adjust any end pointers so that they point to the end of EVL lanes not VF.
2556- auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * {
2557- auto *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(Addr);
2558- if (!EndPtr)
2559- return Addr;
2560- assert (EndPtr->getOperand (1 ) == &EndPtr->getParent ()->getPlan ()->getVF () &&
2561- " VPVectorEndPointerRecipe with non-VF VF operand?" );
2562- assert (
2563- all_of (EndPtr->users (),
2564- [](VPUser *U) {
2565- return cast<VPWidenMemoryRecipe>(U)->isReverse ();
2566- }) &&
2567- " VPVectorEndPointRecipe not used by reversed widened memory recipe?" );
2568- VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone ();
2569- EVLAddr->insertBefore (&CurRecipe);
2570- EVLAddr->setOperand (1 , &EVL);
2571- return EVLAddr;
2571+ auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {
2572+ auto *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone ();
2573+ EVLEndPtr->insertBefore (&CurRecipe);
2574+ EVLEndPtr->setOperand (1 , &EVL);
2575+ return EVLEndPtr;
25722576 };
25732577
2574- return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe)
2575- .Case <VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
2576- VPValue *NewMask = GetNewMask (L->getMask ());
2577- VPValue *NewAddr = GetNewAddr (L->getAddr ());
2578- return new VPWidenLoadEVLRecipe (*L, NewAddr, EVL, NewMask);
2579- })
2580- .Case <VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
2581- VPValue *NewMask = GetNewMask (S->getMask ());
2582- VPValue *NewAddr = GetNewAddr (S->getAddr ());
2583- return new VPWidenStoreEVLRecipe (*S, NewAddr, EVL, NewMask);
2584- })
2585- .Case <VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) {
2586- VPValue *NewMask = GetNewMask (IR->getMask ());
2587- return new VPInterleaveEVLRecipe (*IR, EVL, NewMask);
2588- })
2589- .Case <VPReductionRecipe>([&](VPReductionRecipe *Red) {
2590- VPValue *NewMask = GetNewMask (Red->getCondOp ());
2591- return new VPReductionEVLRecipe (*Red, EVL, NewMask);
2592- })
2593- .Case <VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
2594- VPValue *LHS, *RHS;
2595- // Transform select with a header mask condition
2596- // select(header_mask, LHS, RHS)
2597- // into vector predication merge.
2598- // vp.merge(all-true, LHS, RHS, EVL)
2599- if (!match (VPI, m_Select (m_Specific (HeaderMask), m_VPValue (LHS),
2600- m_VPValue (RHS))))
2601- return nullptr ;
2602- // Use all true as the condition because this transformation is
2603- // limited to selects whose condition is a header mask.
2604- return new VPWidenIntrinsicRecipe (
2605- Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},
2606- TypeInfo.inferScalarType (LHS), VPI->getDebugLoc ());
2607- })
2608- .Default ([&](VPRecipeBase *R) { return nullptr ; });
2578+ if (match (&CurRecipe,
2579+ m_MaskedLoad (m_VPValue (Addr), m_RemoveMask (HeaderMask, Mask))) &&
2580+ !cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2581+ return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe), Addr,
2582+ EVL, Mask);
2583+
2584+ if (match (&CurRecipe,
2585+ m_MaskedLoad (m_VPValue (EndPtr), m_RemoveMask (HeaderMask, Mask))) &&
2586+ match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2587+ cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2588+ return new VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe),
2589+ AdjustEndPtr (EndPtr), EVL, Mask);
2590+
2591+ if (match (&CurRecipe, m_MaskedStore (m_VPValue (Addr), m_VPValue (),
2592+ m_RemoveMask (HeaderMask, Mask))) &&
2593+ !cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2594+ return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe), Addr,
2595+ EVL, Mask);
2596+
2597+ if (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (),
2598+ m_RemoveMask (HeaderMask, Mask))) &&
2599+ match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2600+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2601+ return new VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2602+ AdjustEndPtr (EndPtr), EVL, Mask);
2603+
2604+ if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
2605+ if (Rdx->isConditional () &&
2606+ match (Rdx->getCondOp (), m_RemoveMask (HeaderMask, Mask)))
2607+ return new VPReductionEVLRecipe (*Rdx, EVL, Mask);
2608+
2609+ if (auto *Interleave = dyn_cast<VPInterleaveRecipe>(&CurRecipe))
2610+ if (Interleave->getMask () &&
2611+ match (Interleave->getMask (), m_RemoveMask (HeaderMask, Mask)))
2612+ return new VPInterleaveEVLRecipe (*Interleave, EVL, Mask);
2613+
2614+ VPValue *LHS, *RHS;
2615+ if (match (&CurRecipe,
2616+ m_Select (m_Specific (HeaderMask), m_VPValue (LHS), m_VPValue (RHS))))
2617+ return new VPWidenIntrinsicRecipe (
2618+ Intrinsic::vp_merge, {Plan->getTrue (), LHS, RHS, &EVL},
2619+ TypeInfo.inferScalarType (LHS), CurRecipe.getDebugLoc ());
2620+
2621+ return nullptr ;
26092622}
26102623
26112624// / Replace recipes with their EVL variants.
26122625static void transformRecipestoEVLRecipes (VPlan &Plan, VPValue &EVL) {
26132626 VPTypeAnalysis TypeInfo (Plan);
2614- VPValue *AllOneMask = Plan.getTrue ();
26152627 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
26162628 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
26172629
@@ -2671,7 +2683,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
26712683 ConstantInt::getSigned (Type::getInt32Ty (Plan.getContext ()), -1 ));
26722684 VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe (
26732685 Intrinsic::experimental_vp_splice,
2674- {V1, V2, Imm, AllOneMask , PrevEVL, &EVL},
2686+ {V1, V2, Imm, Plan. getTrue () , PrevEVL, &EVL},
26752687 TypeInfo.inferScalarType (R.getVPSingleValue ()), R.getDebugLoc ());
26762688 VPSplice->insertBefore (&R);
26772689 R.getVPSingleValue ()->replaceAllUsesWith (VPSplice);
@@ -2705,7 +2717,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
27052717 for (VPUser *U : collectUsersRecursively (EVLMask)) {
27062718 auto *CurRecipe = cast<VPRecipeBase>(U);
27072719 VPRecipeBase *EVLRecipe =
2708- optimizeMaskToEVL (EVLMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
2720+ optimizeMaskToEVL (EVLMask, *CurRecipe, TypeInfo, EVL);
27092721 if (!EVLRecipe)
27102722 continue ;
27112723
0 commit comments