@@ -2051,11 +2051,11 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
20512051 return LaneMaskPhi;
20522052}
20532053
2054- // / Collect all VPValues representing a header mask through the (ICMP_ULE,
2055- // / WideCanonicalIV, backedge-taken-count) pattern.
2054+ // / Collect the header mask with the pattern:
2055+ // / (ICMP_ULE, WideCanonicalIV, backedge-taken-count)
20562056// / TODO: Introduce explicit recipe for header-mask instead of searching
20572057// / for the header-mask pattern manually.
2058- static SmallVector<VPValue *> collectAllHeaderMasks (VPlan &Plan) {
2058+ static VPSingleDefRecipe * findHeaderMask (VPlan &Plan) {
20592059 SmallVector<VPValue *> WideCanonicalIVs;
20602060 auto *FoundWidenCanonicalIVUser =
20612061 find_if (Plan.getCanonicalIV ()->users (),
@@ -2079,21 +2079,22 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
20792079 WideCanonicalIVs.push_back (WidenOriginalIV);
20802080 }
20812081
2082- // Walk users of wide canonical IVs and collect to all compares of the form
2082+ // Walk users of wide canonical IVs and find the single compare of the form
20832083 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
2084- SmallVector<VPValue *> HeaderMasks ;
2084+ VPSingleDefRecipe *HeaderMask = nullptr ;
20852085 for (auto *Wide : WideCanonicalIVs) {
20862086 for (VPUser *U : SmallVector<VPUser *>(Wide->users ())) {
2087- auto *HeaderMask = dyn_cast<VPInstruction>(U);
2088- if (!HeaderMask || !vputils::isHeaderMask (HeaderMask , Plan))
2087+ auto *VPI = dyn_cast<VPInstruction>(U);
2088+ if (!VPI || !vputils::isHeaderMask (VPI , Plan))
20892089 continue ;
20902090
2091- assert (HeaderMask ->getOperand (0 ) == Wide &&
2091+ assert (VPI ->getOperand (0 ) == Wide &&
20922092 " WidenCanonicalIV must be the first operand of the compare" );
2093- HeaderMasks.push_back (HeaderMask);
2093+ assert (!HeaderMask && " Multiple header masks found?" );
2094+ HeaderMask = VPI;
20942095 }
20952096 }
2096- return HeaderMasks ;
2097+ return HeaderMask ;
20972098}
20982099
20992100void VPlanTransforms::addActiveLaneMask (
@@ -2109,6 +2110,7 @@ void VPlanTransforms::addActiveLaneMask(
21092110 [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
21102111 assert (FoundWidenCanonicalIVUser &&
21112112 " Must have widened canonical IV when tail folding!" );
2113+ VPSingleDefRecipe *HeaderMask = findHeaderMask (Plan);
21122114 auto *WideCanonicalIV =
21132115 cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
21142116 VPSingleDefRecipe *LaneMask;
@@ -2122,11 +2124,11 @@ void VPlanTransforms::addActiveLaneMask(
21222124 " active.lane.mask" );
21232125 }
21242126
2125- // Walk users of WideCanonicalIV and replace all compares of the form
2126- // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
2127- // active-lane- mask.
2128- for (VPValue * HeaderMask : collectAllHeaderMasks (Plan))
2129- HeaderMask->replaceAllUsesWith (LaneMask );
2127+ // Walk users of WideCanonicalIV and replace the header mask of the form
2128+ // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an active-lane-mask,
2129+ // removing the old one to ensure there is always only a single header mask.
2130+ HeaderMask-> replaceAllUsesWith (LaneMask);
2131+ HeaderMask->eraseFromParent ( );
21302132}
21312133
21322134// / Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
@@ -2252,6 +2254,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22522254 }
22532255 }
22542256
2257+ VPValue *HeaderMask = findHeaderMask (Plan);
2258+ if (!HeaderMask)
2259+ return ;
2260+
22552261 // Replace header masks with a mask equivalent to predicating by EVL:
22562262 //
22572263 // icmp ule widen-canonical-iv backedge-taken-count
@@ -2263,10 +2269,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22632269 VPValue *EVLMask = Builder.createICmp (
22642270 CmpInst::ICMP_ULT,
22652271 Builder.createNaryOp (VPInstruction::StepVector, {}, EVLType), &EVL);
2266- for (VPValue *HeaderMask : collectAllHeaderMasks (Plan)) {
2267- HeaderMask->replaceAllUsesWith (EVLMask);
2268- ToErase.push_back (HeaderMask->getDefiningRecipe ());
2269- }
2272+ HeaderMask->replaceAllUsesWith (EVLMask);
2273+ ToErase.push_back (HeaderMask->getDefiningRecipe ());
22702274
22712275 // Try to optimize header mask recipes away to their EVL variants.
22722276 // TODO: Split optimizeMaskToEVL out and move into
0 commit comments