@@ -2528,90 +2528,102 @@ void VPlanTransforms::addActiveLaneMask(
25282528  HeaderMask->eraseFromParent ();
25292529}
25302530
2531+ template  <typename  Op0_t, typename  Op1_t> struct  RemoveMask_match  {
2532+   Op0_t In;
2533+   Op1_t &Out;
2534+ 
2535+   RemoveMask_match (const  Op0_t &In, Op1_t &Out) : In(In), Out(Out) {}
2536+ 
2537+   template  <typename  OpTy> bool  match (OpTy *V) const  {
2538+     if  (m_Specific (In).match (V)) {
2539+       Out = nullptr ;
2540+       return  true ;
2541+     }
2542+     if  (m_LogicalAnd (m_Specific (In), m_VPValue (Out)).match (V))
2543+       return  true ;
2544+     return  false ;
2545+   }
2546+ };
2547+ 
2548+ // / Match a specific mask \p In, or a combination of it (logical-and In, Out).
2549+ // / Returns the remaining part \p Out if so, or nullptr otherwise.
2550+ template  <typename  Op0_t, typename  Op1_t>
2551+ static  inline  RemoveMask_match<Op0_t, Op1_t> m_RemoveMask (const  Op0_t &In,
2552+                                                           Op1_t &Out) {
2553+   return  RemoveMask_match<Op0_t, Op1_t>(In, Out);
2554+ }
2555+ 
25312556// / Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
25322557// / EVL-based recipe without the header mask. Returns nullptr if no EVL-based
25332558// / recipe could be created.
25342559// / \p HeaderMask  Header Mask.
25352560// / \p CurRecipe   Recipe to be transform.
25362561// / \p TypeInfo    VPlan-based type analysis.
2537- // / \p AllOneMask  The vector mask parameter of vector-predication intrinsics.
25382562// / \p EVL         The explicit vector length parameter of vector-predication
25392563// / intrinsics.
25402564static  VPRecipeBase *optimizeMaskToEVL (VPValue *HeaderMask,
25412565                                       VPRecipeBase &CurRecipe,
2542-                                        VPTypeAnalysis &TypeInfo,
2543-                                        VPValue &AllOneMask, VPValue &EVL) {
2544-   //  FIXME: Don't transform recipes to EVL recipes if they're not masked by the
2545-   //  header mask.
2546-   auto  GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
2547-     assert (OrigMask && " Unmasked recipe when folding tail"  );
2548-     //  HeaderMask will be handled using EVL.
2549-     VPValue *Mask;
2550-     if  (match (OrigMask, m_LogicalAnd (m_Specific (HeaderMask), m_VPValue (Mask))))
2551-       return  Mask;
2552-     return  HeaderMask == OrigMask ? nullptr  : OrigMask;
2553-   };
2566+                                        VPTypeAnalysis &TypeInfo, VPValue &EVL) {
2567+   VPlan *Plan = CurRecipe.getParent ()->getPlan ();
2568+   VPValue *Addr, *Mask, *EndPtr;
25542569
25552570  // / Adjust any end pointers so that they point to the end of EVL lanes not VF.
2556-   auto  GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * {
2557-     auto  *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(Addr);
2558-     if  (!EndPtr)
2559-       return  Addr;
2560-     assert (EndPtr->getOperand (1 ) == &EndPtr->getParent ()->getPlan ()->getVF () &&
2561-            " VPVectorEndPointerRecipe with non-VF VF operand?"  );
2562-     assert (
2563-         all_of (EndPtr->users (),
2564-                [](VPUser *U) {
2565-                  return  cast<VPWidenMemoryRecipe>(U)->isReverse ();
2566-                }) &&
2567-         " VPVectorEndPointRecipe not used by reversed widened memory recipe?"  );
2568-     VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone ();
2569-     EVLAddr->insertBefore (&CurRecipe);
2570-     EVLAddr->setOperand (1 , &EVL);
2571-     return  EVLAddr;
2571+   auto  AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {
2572+     auto  *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone ();
2573+     EVLEndPtr->insertBefore (&CurRecipe);
2574+     EVLEndPtr->setOperand (1 , &EVL);
2575+     return  EVLEndPtr;
25722576  };
25732577
2574-   return  TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe)
2575-       .Case <VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
2576-         VPValue *NewMask = GetNewMask (L->getMask ());
2577-         VPValue *NewAddr = GetNewAddr (L->getAddr ());
2578-         return  new  VPWidenLoadEVLRecipe (*L, NewAddr, EVL, NewMask);
2579-       })
2580-       .Case <VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
2581-         VPValue *NewMask = GetNewMask (S->getMask ());
2582-         VPValue *NewAddr = GetNewAddr (S->getAddr ());
2583-         return  new  VPWidenStoreEVLRecipe (*S, NewAddr, EVL, NewMask);
2584-       })
2585-       .Case <VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) {
2586-         VPValue *NewMask = GetNewMask (IR->getMask ());
2587-         return  new  VPInterleaveEVLRecipe (*IR, EVL, NewMask);
2588-       })
2589-       .Case <VPReductionRecipe>([&](VPReductionRecipe *Red) {
2590-         VPValue *NewMask = GetNewMask (Red->getCondOp ());
2591-         return  new  VPReductionEVLRecipe (*Red, EVL, NewMask);
2592-       })
2593-       .Case <VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
2594-         VPValue *LHS, *RHS;
2595-         //  Transform select with a header mask condition
2596-         //    select(header_mask, LHS, RHS)
2597-         //  into vector predication merge.
2598-         //    vp.merge(all-true, LHS, RHS, EVL)
2599-         if  (!match (VPI, m_Select (m_Specific (HeaderMask), m_VPValue (LHS),
2600-                                  m_VPValue (RHS))))
2601-           return  nullptr ;
2602-         //  Use all true as the condition because this transformation is
2603-         //  limited to selects whose condition is a header mask.
2604-         return  new  VPWidenIntrinsicRecipe (
2605-             Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},
2606-             TypeInfo.inferScalarType (LHS), VPI->getDebugLoc ());
2607-       })
2608-       .Default ([&](VPRecipeBase *R) { return  nullptr ; });
2578+   if  (match (&CurRecipe,
2579+             m_MaskedLoad (m_VPValue (Addr), m_RemoveMask (HeaderMask, Mask))) &&
2580+       !cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2581+     return  new  VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe), Addr,
2582+                                     EVL, Mask);
2583+ 
2584+   if  (match (&CurRecipe,
2585+             m_MaskedLoad (m_VPValue (EndPtr), m_RemoveMask (HeaderMask, Mask))) &&
2586+       match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2587+       cast<VPWidenLoadRecipe>(CurRecipe).isReverse ())
2588+     return  new  VPWidenLoadEVLRecipe (cast<VPWidenLoadRecipe>(CurRecipe),
2589+                                     AdjustEndPtr (EndPtr), EVL, Mask);
2590+ 
2591+   if  (match (&CurRecipe, m_MaskedStore (m_VPValue (Addr), m_VPValue (),
2592+                                       m_RemoveMask (HeaderMask, Mask))) &&
2593+       !cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2594+     return  new  VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe), Addr,
2595+                                      EVL, Mask);
2596+ 
2597+   if  (match (&CurRecipe, m_MaskedStore (m_VPValue (EndPtr), m_VPValue (),
2598+                                       m_RemoveMask (HeaderMask, Mask))) &&
2599+       match (EndPtr, m_VecEndPtr (m_VPValue (Addr), m_Specific (&Plan->getVF ()))) &&
2600+       cast<VPWidenStoreRecipe>(CurRecipe).isReverse ())
2601+     return  new  VPWidenStoreEVLRecipe (cast<VPWidenStoreRecipe>(CurRecipe),
2602+                                      AdjustEndPtr (EndPtr), EVL, Mask);
2603+ 
2604+   if  (auto  *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
2605+     if  (Rdx->isConditional () &&
2606+         match (Rdx->getCondOp (), m_RemoveMask (HeaderMask, Mask)))
2607+       return  new  VPReductionEVLRecipe (*Rdx, EVL, Mask);
2608+ 
2609+   if  (auto  *Interleave = dyn_cast<VPInterleaveRecipe>(&CurRecipe))
2610+     if  (Interleave->getMask () &&
2611+         match (Interleave->getMask (), m_RemoveMask (HeaderMask, Mask)))
2612+       return  new  VPInterleaveEVLRecipe (*Interleave, EVL, Mask);
2613+ 
2614+   VPValue *LHS, *RHS;
2615+   if  (match (&CurRecipe,
2616+             m_Select (m_Specific (HeaderMask), m_VPValue (LHS), m_VPValue (RHS))))
2617+     return  new  VPWidenIntrinsicRecipe (
2618+         Intrinsic::vp_merge, {Plan->getTrue (), LHS, RHS, &EVL},
2619+         TypeInfo.inferScalarType (LHS), CurRecipe.getDebugLoc ());
2620+ 
2621+   return  nullptr ;
26092622}
26102623
26112624// / Replace recipes with their EVL variants.
26122625static  void  transformRecipestoEVLRecipes (VPlan &Plan, VPValue &EVL) {
26132626  VPTypeAnalysis TypeInfo (Plan);
2614-   VPValue *AllOneMask = Plan.getTrue ();
26152627  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
26162628  VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
26172629
@@ -2671,7 +2683,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
26712683            ConstantInt::getSigned (Type::getInt32Ty (Plan.getContext ()), -1 ));
26722684        VPWidenIntrinsicRecipe *VPSplice = new  VPWidenIntrinsicRecipe (
26732685            Intrinsic::experimental_vp_splice,
2674-             {V1, V2, Imm, AllOneMask , PrevEVL, &EVL},
2686+             {V1, V2, Imm, Plan. getTrue () , PrevEVL, &EVL},
26752687            TypeInfo.inferScalarType (R.getVPSingleValue ()), R.getDebugLoc ());
26762688        VPSplice->insertBefore (&R);
26772689        R.getVPSingleValue ()->replaceAllUsesWith (VPSplice);
@@ -2705,7 +2717,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
27052717  for  (VPUser *U : collectUsersRecursively (EVLMask)) {
27062718    auto  *CurRecipe = cast<VPRecipeBase>(U);
27072719    VPRecipeBase *EVLRecipe =
2708-         optimizeMaskToEVL (EVLMask, *CurRecipe, TypeInfo, *AllOneMask,  EVL);
2720+         optimizeMaskToEVL (EVLMask, *CurRecipe, TypeInfo, EVL);
27092721    if  (!EVLRecipe)
27102722      continue ;
27112723
0 commit comments