@@ -2051,11 +2051,11 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
2051
2051
return LaneMaskPhi;
2052
2052
}
2053
2053
2054
- // / Collect all VPValues representing a header mask through the (ICMP_ULE,
2055
- // / WideCanonicalIV, backedge-taken-count) pattern.
2054
+ // / Collect the header mask with the pattern:
2055
+ // / (ICMP_ULE, WideCanonicalIV, backedge-taken-count)
2056
2056
// / TODO: Introduce explicit recipe for header-mask instead of searching
2057
2057
// / for the header-mask pattern manually.
2058
- static SmallVector<VPValue *> collectAllHeaderMasks (VPlan &Plan) {
2058
+ static VPSingleDefRecipe * findHeaderMask (VPlan &Plan) {
2059
2059
SmallVector<VPValue *> WideCanonicalIVs;
2060
2060
auto *FoundWidenCanonicalIVUser =
2061
2061
find_if (Plan.getCanonicalIV ()->users (),
@@ -2079,21 +2079,22 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
2079
2079
WideCanonicalIVs.push_back (WidenOriginalIV);
2080
2080
}
2081
2081
2082
- // Walk users of wide canonical IVs and collect to all compares of the form
2082
+ // Walk users of wide canonical IVs and find the single compare of the form
2083
2083
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
2084
- SmallVector<VPValue *> HeaderMasks ;
2084
+ VPSingleDefRecipe *HeaderMask = nullptr ;
2085
2085
for (auto *Wide : WideCanonicalIVs) {
2086
2086
for (VPUser *U : SmallVector<VPUser *>(Wide->users ())) {
2087
- auto *HeaderMask = dyn_cast<VPInstruction>(U);
2088
- if (!HeaderMask || !vputils::isHeaderMask (HeaderMask , Plan))
2087
+ auto *VPI = dyn_cast<VPInstruction>(U);
2088
+ if (!VPI || !vputils::isHeaderMask (VPI , Plan))
2089
2089
continue ;
2090
2090
2091
- assert (HeaderMask ->getOperand (0 ) == Wide &&
2091
+ assert (VPI ->getOperand (0 ) == Wide &&
2092
2092
" WidenCanonicalIV must be the first operand of the compare" );
2093
- HeaderMasks.push_back (HeaderMask);
2093
+ assert (!HeaderMask && " Multiple header masks found?" );
2094
+ HeaderMask = VPI;
2094
2095
}
2095
2096
}
2096
- return HeaderMasks ;
2097
+ return HeaderMask ;
2097
2098
}
2098
2099
2099
2100
void VPlanTransforms::addActiveLaneMask (
@@ -2109,6 +2110,7 @@ void VPlanTransforms::addActiveLaneMask(
2109
2110
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
2110
2111
assert (FoundWidenCanonicalIVUser &&
2111
2112
" Must have widened canonical IV when tail folding!" );
2113
+ VPSingleDefRecipe *HeaderMask = findHeaderMask (Plan);
2112
2114
auto *WideCanonicalIV =
2113
2115
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
2114
2116
VPSingleDefRecipe *LaneMask;
@@ -2122,11 +2124,11 @@ void VPlanTransforms::addActiveLaneMask(
2122
2124
" active.lane.mask" );
2123
2125
}
2124
2126
2125
- // Walk users of WideCanonicalIV and replace all compares of the form
2126
- // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
2127
- // active-lane- mask.
2128
- for (VPValue * HeaderMask : collectAllHeaderMasks (Plan))
2129
- HeaderMask->replaceAllUsesWith (LaneMask );
2127
+ // Walk users of WideCanonicalIV and replace the header mask of the form
2128
+ // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an active-lane-mask,
2129
+ // removing the old one to ensure there is always only a single header mask.
2130
+ HeaderMask-> replaceAllUsesWith (LaneMask);
2131
+ HeaderMask->eraseFromParent ( );
2130
2132
}
2131
2133
2132
2134
// / Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
@@ -2252,6 +2254,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
2252
2254
}
2253
2255
}
2254
2256
2257
+ VPValue *HeaderMask = findHeaderMask (Plan);
2258
+ if (!HeaderMask)
2259
+ return ;
2260
+
2255
2261
// Replace header masks with a mask equivalent to predicating by EVL:
2256
2262
//
2257
2263
// icmp ule widen-canonical-iv backedge-taken-count
@@ -2263,10 +2269,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
2263
2269
VPValue *EVLMask = Builder.createICmp (
2264
2270
CmpInst::ICMP_ULT,
2265
2271
Builder.createNaryOp (VPInstruction::StepVector, {}, EVLType), &EVL);
2266
- for (VPValue *HeaderMask : collectAllHeaderMasks (Plan)) {
2267
- HeaderMask->replaceAllUsesWith (EVLMask);
2268
- ToErase.push_back (HeaderMask->getDefiningRecipe ());
2269
- }
2272
+ HeaderMask->replaceAllUsesWith (EVLMask);
2273
+ ToErase.push_back (HeaderMask->getDefiningRecipe ());
2270
2274
2271
2275
// Try to optimize header mask recipes away to their EVL variants.
2272
2276
// TODO: Split optimizeMaskToEVL out and move into
0 commit comments