Skip to content

Commit 74591cd

Browse files
committed
Step 1: Create scalar phi to save previous EVL
1 parent 65bd025 commit 74591cd

File tree

4 files changed

+35
-4
lines changed

4 files changed

+35
-4
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
535535
case VPRecipeBase::VPWidenPointerInductionSC:
536536
case VPRecipeBase::VPReductionPHISC:
537537
case VPRecipeBase::VPScalarCastSC:
538+
case VPRecipeBase::VPScalarPHISC:
538539
case VPRecipeBase::VPPartialReductionSC:
539540
return true;
540541
case VPRecipeBase::VPBranchOnMaskSC:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,29 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
17271727
VPTypeAnalysis TypeInfo(CanonicalIVType);
17281728
LLVMContext &Ctx = CanonicalIVType->getContext();
17291729
VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
1730+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1731+
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1732+
1733+
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
1734+
// contained.
1735+
bool ContainsFORs =
1736+
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
1737+
if (ContainsFORs) {
1738+
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
1739+
VPValue *MaxEVL = &Plan.getVF();
1740+
// Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer.
1741+
if (unsigned VFSize =
1742+
TypeInfo.inferScalarType(MaxEVL)->getScalarSizeInBits();
1743+
VFSize != 32) {
1744+
MaxEVL = new VPScalarCastRecipe(
1745+
VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL,
1746+
Type::getInt32Ty(Ctx), DebugLoc());
1747+
VPBasicBlock *Preheader = LoopRegion->getPreheaderVPBB();
1748+
Preheader->appendRecipe(cast<VPScalarCastRecipe>(MaxEVL));
1749+
}
1750+
auto *PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl");
1751+
PrevEVL->insertBefore(*Header, Header->getFirstNonPhi());
1752+
}
17301753

17311754
for (VPUser *U : to_vector(Plan.getVF().users())) {
17321755
if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
143143
})
144144
.Case<VPWidenStoreEVLRecipe, VPReductionEVLRecipe>(
145145
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
146-
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
146+
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe,
147+
VPScalarPHIRecipe>(
147148
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
148149
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
149150
return VerifyEVLUse(*W,

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
2929
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
3030
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
3131
; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
32+
; IF-EVL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP8]] to i32
3233
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
3334
; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
3435
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1
@@ -38,8 +39,9 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
3839
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
3940
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
4041
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
42+
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP25]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
4143
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
42-
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
44+
; IF-EVL-NEXT: [[TMP12]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
4345
; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
4446
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP13]]
4547
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 0
@@ -174,6 +176,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
174176
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
175177
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
176178
; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
179+
; IF-EVL-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP8]] to i32
177180
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
178181
; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
179182
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1
@@ -188,8 +191,9 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
188191
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
189192
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
190193
; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
194+
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP32]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
191195
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
192-
; IF-EVL-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
196+
; IF-EVL-NEXT: [[TMP15]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
193197
; IF-EVL-NEXT: [[TMP16:%.*]] = add i64 [[EVL_BASED_IV]], 0
194198
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP16]]
195199
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0
@@ -344,6 +348,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
344348
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
345349
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
346350
; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
351+
; IF-EVL-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP8]] to i32
347352
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
348353
; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
349354
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1
@@ -363,8 +368,9 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
363368
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
364369
; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
365370
; IF-EVL-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
371+
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP39]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
366372
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
367-
; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
373+
; IF-EVL-NEXT: [[TMP18]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
368374
; IF-EVL-NEXT: [[TMP19:%.*]] = add i64 [[EVL_BASED_IV]], 0
369375
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP19]]
370376
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 0

0 commit comments

Comments
 (0)