Skip to content

Commit 0677ae6

Browse files
committed
VPlan: implement VPlan-level constant-folding
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants.
1 parent 3793702 commit 0677ae6

11 files changed

+255
-137
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7790,7 +7790,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77907790
OrigLoop->getHeader()->getContext());
77917791
VPlanTransforms::materializeBroadcasts(BestVPlan);
77927792
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
7793-
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
7793+
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType(),
7794+
OrigLoop->getHeader()->getDataLayout());
77947795
VPlanTransforms::narrowInterleaveGroups(
77957796
BestVPlan, BestVF,
77967797
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -9095,7 +9096,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
90959096
if (!HasScalarVF)
90969097
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
90979098
*Plan, CM.getMinimalBitwidths());
9098-
VPlanTransforms::optimize(*Plan);
9099+
VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
90999100
// TODO: try to put it close to addActiveLaneMask().
91009101
// Discard the plan if it is not EVL-compatible
91019102
if (CM.foldTailWithEVL() && !HasScalarVF &&

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1542,7 +1542,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
15421542
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
15431543
VFRange SubRange = {VF, MaxVFTimes2};
15441544
if (auto Plan = tryToBuildVPlan(SubRange)) {
1545-
VPlanTransforms::optimize(*Plan);
1545+
VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
15461546
// Update the name of the latch of the top-level vector loop region region
15471547
// after optimizations which includes block folding.
15481548
Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/ADT/TypeSwitch.h"
2929
#include "llvm/Analysis/IVDescriptors.h"
3030
#include "llvm/Analysis/LoopInfo.h"
31+
#include "llvm/Analysis/TargetFolder.h"
3132
#include "llvm/Analysis/VectorUtils.h"
3233
#include "llvm/IR/Intrinsics.h"
3334
#include "llvm/IR/PatternMatch.h"
@@ -939,10 +940,84 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
939940
}
940941
}
941942

943+
class VPConstantFolder {
944+
TargetFolder Folder;
945+
VPTypeAnalysis TypeInfo;
946+
947+
public:
948+
VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
949+
: Folder(DL), TypeInfo(TypeInfo) {}
950+
951+
Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
952+
ArrayRef<VPValue *> Operands) {
953+
SmallVector<Value *, 4> Ops;
954+
for (VPValue *Op : Operands) {
955+
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
956+
return nullptr;
957+
Ops.emplace_back(Op->getLiveInIRValue());
958+
}
959+
switch (Opcode) {
960+
case Instruction::BinaryOps::Add:
961+
case Instruction::BinaryOps::Sub:
962+
case Instruction::BinaryOps::Mul:
963+
case Instruction::BinaryOps::AShr:
964+
case Instruction::BinaryOps::LShr:
965+
case Instruction::BinaryOps::And:
966+
case Instruction::BinaryOps::Or:
967+
case Instruction::BinaryOps::Xor:
968+
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
969+
Ops[0], Ops[1]);
970+
case VPInstruction::LogicalAnd:
971+
return Folder.FoldSelect(Ops[0], Ops[1],
972+
ConstantInt::getNullValue(Ops[1]->getType()));
973+
case VPInstruction::Not:
974+
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
975+
Constant::getAllOnesValue(Ops[0]->getType()));
976+
case Instruction::Select:
977+
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
978+
case Instruction::ICmp:
979+
case Instruction::FCmp:
980+
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
981+
Ops[1]);
982+
case Instruction::GetElementPtr:
983+
case VPInstruction::PtrAdd:
984+
return Folder.FoldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()),
985+
Ops[0], drop_begin(Ops),
986+
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
987+
case Instruction::InsertElement:
988+
return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
989+
case Instruction::ExtractElement:
990+
return Folder.FoldExtractElement(Ops[0], Ops[1]);
991+
case Instruction::CastOps::SExt:
992+
case Instruction::CastOps::ZExt:
993+
case Instruction::CastOps::Trunc:
994+
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
995+
TypeInfo.inferScalarType(R.getVPSingleValue()));
996+
}
997+
return nullptr;
998+
}
999+
};
1000+
9421001
/// Try to simplify recipe \p R.
943-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1002+
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
1003+
const DataLayout &DL) {
9441004
using namespace llvm::VPlanPatternMatch;
9451005

1006+
// Constant folding.
1007+
VPConstantFolder Folder(DL, TypeInfo);
1008+
if (TypeSwitch<VPRecipeBase *, bool>(&R)
1009+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
1010+
VPReplicateRecipe>([&](auto *I) {
1011+
VPlan *Plan = R.getParent()->getPlan();
1012+
Value *V =
1013+
Folder.tryToConstantFold(R, I->getOpcode(), I->operands());
1014+
if (V)
1015+
R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
1016+
return V;
1017+
})
1018+
.Default([](auto *) { return false; }))
1019+
return;
1020+
9461021
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
9471022
// part 0 can be replaced by their start value, if only the first lane is
9481023
// demanded.
@@ -1075,13 +1150,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10751150
}
10761151
}
10771152

1078-
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
1153+
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
1154+
const DataLayout &DL) {
10791155
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
10801156
Plan.getEntry());
10811157
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
10821158
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
10831159
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1084-
simplifyRecipe(R, TypeInfo);
1160+
simplifyRecipe(R, TypeInfo, DL);
10851161
}
10861162
}
10871163
}
@@ -1342,7 +1418,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
13421418

13431419
VPBlockUtils::connectBlocks(Preheader, Header);
13441420
VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
1345-
VPlanTransforms::simplifyRecipes(Plan, *CanIVTy);
1421+
VPlanTransforms::simplifyRecipes(Plan, *CanIVTy,
1422+
PSE.getSE()->getDataLayout());
13461423
} else {
13471424
// The vector region contains header phis for which we cannot remove the
13481425
// loop region yet.
@@ -1772,17 +1849,16 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
17721849
VPBB->back().eraseFromParent();
17731850
}
17741851
}
1775-
1776-
void VPlanTransforms::optimize(VPlan &Plan) {
1852+
void VPlanTransforms::optimize(VPlan &Plan, const DataLayout &DL) {
17771853
runPass(removeRedundantCanonicalIVs, Plan);
17781854
runPass(removeRedundantInductionCasts, Plan);
17791855

1780-
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1856+
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
17811857
runPass(simplifyBlends, Plan);
17821858
runPass(removeDeadRecipes, Plan);
17831859
runPass(legalizeAndOptimizeInductions, Plan);
17841860
runPass(removeRedundantExpandSCEVRecipes, Plan);
1785-
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1861+
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
17861862
runPass(removeBranchOnCondTrue, Plan);
17871863
runPass(removeDeadRecipes, Plan);
17881864

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ struct VPlanTransforms {
109109
/// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
110110
/// optimizations, dead recipe removal, replicate region optimizations and
111111
/// block merging.
112-
static void optimize(VPlan &Plan);
112+
static void optimize(VPlan &Plan, const DataLayout &DL);
113113

114114
/// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
115115
/// region block and remove the mask operand. Optimize the created regions by
@@ -190,7 +190,8 @@ struct VPlanTransforms {
190190

191191
/// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
192192
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
193-
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
193+
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
194+
const DataLayout &DL);
194195

195196
/// If there's a single exit block, optimize its phi recipes that use exiting
196197
/// IV values by feeding them precomputed end values instead, possibly taken

llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll

Lines changed: 8 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,10 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
4747
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4848
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4949
; CHECK: [[MIDDLE_BLOCK]]:
50-
; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
51-
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
52-
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
53-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
5450
; CHECK-NEXT: br label %[[SCALAR_PH]]
5551
; CHECK: [[SCALAR_PH]]:
5652
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
57-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
53+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
5854
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
5955
; CHECK: [[LOOP_HEADER]]:
6056
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -141,14 +137,10 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
141137
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
142138
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
143139
; CHECK: [[MIDDLE_BLOCK]]:
144-
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
145-
; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
146-
; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
147-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
148140
; CHECK-NEXT: br label %[[SCALAR_PH]]
149141
; CHECK: [[SCALAR_PH]]:
150142
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
151-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
143+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
152144
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
153145
; CHECK: [[LOOP_HEADER]]:
154146
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -235,14 +227,10 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
235227
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
236228
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
237229
; CHECK: [[MIDDLE_BLOCK]]:
238-
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
239-
; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
240-
; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
241-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
242230
; CHECK-NEXT: br label %[[SCALAR_PH]]
243231
; CHECK: [[SCALAR_PH]]:
244232
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
245-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
233+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
246234
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
247235
; CHECK: [[LOOP_HEADER]]:
248236
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -339,14 +327,10 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
339327
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
340328
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
341329
; CHECK: [[MIDDLE_BLOCK]]:
342-
; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
343-
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
344-
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
345-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
346330
; CHECK-NEXT: br label %[[SCALAR_PH]]
347331
; CHECK: [[SCALAR_PH]]:
348332
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
349-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
333+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
350334
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
351335
; CHECK: [[LOOP_HEADER]]:
352336
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -445,14 +429,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
445429
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
446430
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
447431
; CHECK: [[MIDDLE_BLOCK]]:
448-
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
449-
; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
450-
; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
451-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
452432
; CHECK-NEXT: br label %[[SCALAR_PH]]
453433
; CHECK: [[SCALAR_PH]]:
454434
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
455-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
435+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
456436
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
457437
; CHECK: [[LOOP_HEADER]]:
458438
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -559,14 +539,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
559539
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
560540
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
561541
; CHECK: [[MIDDLE_BLOCK]]:
562-
; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
563-
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
564-
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
565-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
566542
; CHECK-NEXT: br label %[[SCALAR_PH]]
567543
; CHECK: [[SCALAR_PH]]:
568544
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
569-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
545+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
570546
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
571547
; CHECK: [[LOOP_HEADER]]:
572548
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -663,15 +639,11 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 {
663639
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
664640
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
665641
; CHECK: [[MIDDLE_BLOCK]]:
666-
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
667-
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 8
668-
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
669-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP16]]
670642
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
671643
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
672644
; CHECK: [[SCALAR_PH]]:
673645
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
674-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
646+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
675647
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
676648
; CHECK: [[LOOP_HEADER]]:
677649
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -751,15 +723,11 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 {
751723
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
752724
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
753725
; CHECK: [[MIDDLE_BLOCK]]:
754-
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
755-
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 8
756-
; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1
757-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP17]]
758726
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
759727
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
760728
; CHECK: [[SCALAR_PH]]:
761729
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
762-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
730+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
763731
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
764732
; CHECK: [[LOOP_HEADER]]:
765733
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]

0 commit comments

Comments
 (0)