Skip to content

Commit c6ca1bc

Browse files
committed
[VPlan] Fix first-order splices without header mask not using EVL
This fixes a buildbot failure with EVL tail folding after llvm#144666: https://lab.llvm.org/buildbot/#/builders/132/builds/1653 For a first-order recurrence to be correct with EVL tail folding we need to convert splices to vp splices with the EVL operand. Originally we did this by looking for users of the header mask and its users, and converting it in createEVLRecipe. However after llvm#144666 a FOR might not actually use the header mask if it's based off e.g. an induction variable, and so we wouldn't pick it up in createEVLRecipe. This fixes this by converting FOR splices separately in a loop over all recipes in the plan, regardless of whether or not it uses the header mask. I think there was some conflation in createEVLRecipe between what was an optimisation and what was needed for correctness. Most of the transforms in it just exist to optimize the mask away and we should still emit correct code without them. So I've renamed it to make the separation clearer.
1 parent 4da5de5 commit c6ca1bc

File tree

2 files changed

+38
-30
lines changed

2 files changed

+38
-30
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,21 +2114,19 @@ void VPlanTransforms::addActiveLaneMask(
21142114
HeaderMask->replaceAllUsesWith(LaneMask);
21152115
}
21162116

2117-
/// Try to convert \p CurRecipe to a corresponding EVL-based recipe. Returns
2118-
/// nullptr if no EVL-based recipe could be created.
2117+
/// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
2118+
/// EVL-based recipe without the mask. Returns nullptr if no EVL-based recipe
2119+
/// could be created.
21192120
/// \p HeaderMask Header Mask.
21202121
/// \p CurRecipe Recipe to be transform.
21212122
/// \p TypeInfo VPlan-based type analysis.
21222123
/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
21232124
/// \p EVL The explicit vector length parameter of vector-predication
21242125
/// intrinsics.
2125-
/// \p PrevEVL The explicit vector length of the previous iteration. Only
2126-
/// required if \p CurRecipe is a VPInstruction::FirstOrderRecurrenceSplice.
2127-
static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
2128-
VPRecipeBase &CurRecipe,
2129-
VPTypeAnalysis &TypeInfo,
2130-
VPValue &AllOneMask, VPValue &EVL,
2131-
VPValue *PrevEVL) {
2126+
static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
2127+
VPRecipeBase &CurRecipe,
2128+
VPTypeAnalysis &TypeInfo,
2129+
VPValue &AllOneMask, VPValue &EVL) {
21322130
using namespace llvm::VPlanPatternMatch;
21332131
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
21342132
assert(OrigMask && "Unmasked recipe when folding tail");
@@ -2160,18 +2158,6 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
21602158
Sel->getDebugLoc());
21612159
})
21622160
.Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
2163-
if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) {
2164-
assert(PrevEVL && "Fixed-order recurrences require previous EVL");
2165-
VPValue *MinusOneVPV = VPI->getParent()->getPlan()->getOrAddLiveIn(
2166-
ConstantInt::getSigned(Type::getInt32Ty(TypeInfo.getContext()),
2167-
-1));
2168-
SmallVector<VPValue *> Ops(VPI->operands());
2169-
Ops.append({MinusOneVPV, &AllOneMask, PrevEVL, &EVL});
2170-
return new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splice,
2171-
Ops, TypeInfo.inferScalarType(VPI),
2172-
VPI->getDebugLoc());
2173-
}
2174-
21752161
VPValue *LHS, *RHS;
21762162
// Transform select with a header mask condition
21772163
// select(header_mask, LHS, RHS)
@@ -2204,9 +2190,12 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22042190
"User of VF that we can't transform to EVL.");
22052191
Plan.getVF().replaceAllUsesWith(&EVL);
22062192

2193+
// Defer erasing recipes till the end so that we don't invalidate the
2194+
// VPTypeAnalysis cache.
2195+
SmallVector<VPRecipeBase *> ToErase;
2196+
22072197
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
22082198
// contained.
2209-
VPInstruction *PrevEVL = nullptr;
22102199
bool ContainsFORs =
22112200
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
22122201
if (ContainsFORs) {
@@ -2219,16 +2208,37 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22192208
DebugLoc());
22202209

22212210
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2222-
PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
2211+
VPValue *PrevEVL =
2212+
Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
2213+
2214+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2215+
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
2216+
for (VPRecipeBase &R : *VPBB) {
2217+
using namespace VPlanPatternMatch;
2218+
VPValue *V1, *V2;
2219+
if (!match(&R,
2220+
m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2221+
m_VPValue(V1), m_VPValue(V2))))
2222+
continue;
2223+
VPValue *Imm = Plan.getOrAddLiveIn(
2224+
ConstantInt::getSigned(Type::getInt32Ty(Ctx), -1));
2225+
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
2226+
Intrinsic::experimental_vp_splice,
2227+
{V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
2228+
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
2229+
VPSplice->insertBefore(&R);
2230+
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2231+
ToErase.push_back(&R);
2232+
}
2233+
}
22232234
}
22242235

2225-
SmallVector<VPRecipeBase *> ToErase;
2226-
2236+
// Try to optimize header mask recipes away to their EVL variants.
22272237
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
22282238
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
22292239
auto *CurRecipe = cast<VPRecipeBase>(U);
2230-
VPRecipeBase *EVLRecipe = createEVLRecipe(
2231-
HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL, PrevEVL);
2240+
VPRecipeBase *EVLRecipe =
2241+
optimizeMaskToEVL(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
22322242
if (!EVLRecipe)
22332243
continue;
22342244

@@ -2244,8 +2254,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22442254
VPValue *CurVPV = CurRecipe->getVPSingleValue();
22452255
CurVPV->replaceAllUsesWith(EVLRecipe->getVPSingleValue());
22462256
}
2247-
// Defer erasing recipes till the end so that we don't invalidate the
2248-
// VPTypeAnalysis cache.
22492257
ToErase.push_back(CurRecipe);
22502258
}
22512259
}

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
628628
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
629629
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
630630
; IF-EVL-NEXT: [[TMP20]] = add <vscale x 2 x i64> [[VEC_IND]], splat (i64 42)
631-
; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP20]], i32 -1)
631+
; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP20]], i32 -1, <vscale x 2 x i1> splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP11]])
632632
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[EVL_BASED_IV]]
633633
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i32 0
634634
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])

0 commit comments

Comments
 (0)