Skip to content

Commit afe8150

Browse files
committed
[VPlan] Simplify exituser handling by generating all extracts first(NFCI)
Simplify the handling of exit users by generating all extracts first (safe option), and have FOR handling optimize the extracts, similar to already done for reductions and inductions. NFC modulo first-order recurrence extract order in middle block.
1 parent d4a8a01 commit afe8150

17 files changed

+124
-143
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8442,56 +8442,11 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84428442
}
84438443
}
84448444

8445-
// Collect VPIRInstructions for phis in the exit block from the latch only.
8446-
static SetVector<VPIRInstruction *> collectUsersInLatchExitBlock(VPlan &Plan) {
8447-
SetVector<VPIRInstruction *> ExitUsersToFix;
8448-
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
8449-
8450-
if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock())
8451-
continue;
8452-
8453-
for (VPRecipeBase &R : ExitVPBB->phis()) {
8454-
auto *ExitIRI = cast<VPIRPhi>(&R);
8455-
assert(ExitIRI->getNumOperands() == 1 && "must have a single operand");
8456-
VPValue *V = ExitIRI->getOperand(0);
8457-
if (V->isLiveIn())
8458-
continue;
8459-
assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
8460-
"Only recipes defined inside a region should need fixing.");
8461-
ExitUsersToFix.insert(ExitIRI);
8462-
}
8463-
}
8464-
return ExitUsersToFix;
8465-
}
8466-
8467-
// Add exit values to \p Plan. Extracts are added for each entry in \p
8468-
// ExitUsersToFix if needed and their operands are updated.
8469-
static void
8470-
addUsersInExitBlocks(VPlan &Plan,
8471-
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8472-
if (ExitUsersToFix.empty())
8473-
return;
8474-
8475-
auto *MiddleVPBB = Plan.getMiddleBlock();
8476-
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8477-
8478-
// Introduce extract for exiting values and update the VPIRInstructions
8479-
// modeling the corresponding LCSSA phis.
8480-
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8481-
assert(ExitIRI->getNumOperands() == 1 &&
8482-
ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
8483-
"exit values from early exits must be fixed when branch to "
8484-
"early-exit is added");
8485-
ExitIRI->extractLastLaneOfFirstOperand(B);
8486-
}
8487-
}
8488-
84898445
/// Handle users in the exit block for first order reductions in the original
84908446
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
84918447
/// users in the original exit block using the VPIRInstruction wrapping to the
84928448
/// LCSSA phi.
8493-
static void addExitUsersForFirstOrderRecurrences(
8494-
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix, VFRange &Range) {
8449+
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range) {
84958450
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
84968451
auto *ScalarPHVPBB = Plan.getScalarPreheader();
84978452
auto *MiddleVPBB = Plan.getMiddleBlock();
@@ -8580,23 +8535,23 @@ static void addExitUsersForFirstOrderRecurrences(
85808535
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
85818536
// Extract the penultimate value of the recurrence and use it as operand for
85828537
// the VPIRInstruction modeling the phi.
8583-
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8584-
if (ExitIRI->getOperand(0) != FOR)
8538+
for (VPUser *U : FOR->users()) {
8539+
using namespace llvm::VPlanPatternMatch;
8540+
if (!match(U, m_VPInstruction<VPInstruction::ExtractLastElement>(
8541+
m_Specific(FOR))))
85858542
continue;
85868543
// For VF vscale x 1, if vscale = 1, we are unable to extract the
8587-
// penultimate value of the recurrence. Instead, we rely on function
8588-
// addUsersInExitBlocks to extract the last element from the result of
8589-
// VPInstruction::FirstOrderRecurrenceSplice by leaving the user of the
8590-
// recurrence phi in ExitUsersToFix.
8544+
// penultimate value of the recurrence. Instead we rely on the existing
8545+
// extract of the last element from the result of
8546+
// VPInstruction::FirstOrderRecurrenceSplice.
85918547
// TODO: Consider vscale_range info and UF.
85928548
if (LoopVectorizationPlanner::getDecisionAndClampRange(IsScalableOne,
85938549
Range))
85948550
return;
85958551
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
85968552
VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()},
85978553
{}, "vector.recur.extract.for.phi");
8598-
ExitIRI->setOperand(0, PenultimateElement);
8599-
ExitUsersToFix.remove(ExitIRI);
8554+
cast<VPInstruction>(U)->replaceAllUsesWith(PenultimateElement);
86008555
}
86018556
}
86028557
}
@@ -8630,6 +8585,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86308585
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
86318586
Legal->hasUncountableEarlyExit(), Range);
86328587
VPlanTransforms::createLoopRegions(*Plan);
8588+
VPlanTransforms::createExtractsForLiveOuts(*Plan);
86338589

86348590
// Don't use getDecisionAndClampRange here, because we don't know the UF
86358591
// so this function is better to be conservative, rather than to split
@@ -8802,12 +8758,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88028758
R->setOperand(1, WideIV->getStepValue());
88038759
}
88048760

8761+
addExitUsersForFirstOrderRecurrences(*Plan, Range);
88058762
DenseMap<VPValue *, VPValue *> IVEndValues;
88068763
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
8807-
SetVector<VPIRInstruction *> ExitUsersToFix =
8808-
collectUsersInLatchExitBlock(*Plan);
8809-
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix, Range);
8810-
addUsersInExitBlocks(*Plan, ExitUsersToFix);
88118764

88128765
// ---------------------------------------------------------------------------
88138766
// Transform initial VPlan: Apply previously taken decisions, in order, to

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,30 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) {
591591
TopRegion->getEntryBasicBlock()->setName("vector.body");
592592
}
593593

594+
void VPlanTransforms::createExtractsForLiveOuts(VPlan &Plan) {
595+
for (VPBasicBlock *EB : Plan.getExitBlocks()) {
596+
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
597+
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
598+
599+
if (EB->getSinglePredecessor() != Plan.getMiddleBlock())
600+
continue;
601+
602+
for (VPRecipeBase &R : EB->phis()) {
603+
auto *ExitIRI = cast<VPIRPhi>(&R);
604+
for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
605+
VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
606+
if (!Inc || !Inc->getParent()->getParent())
607+
continue;
608+
assert(ExitIRI->getNumOperands() == 1 &&
609+
ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
610+
"exit values from early exits must be fixed when branch to "
611+
"early-exit is added");
612+
ExitIRI->extractLastLaneOfFirstOperand(B);
613+
}
614+
}
615+
}
616+
}
617+
594618
// Likelyhood of bypassing the vectorized loop due to a runtime check block,
595619
// including memory overlap checks block and wrapping/unit-stride checks block.
596620
static constexpr uint32_t CheckBypassWeights[] = {1, 127};

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ struct VPlanTransforms {
7474
/// flat CFG into a hierarchical CFG.
7575
LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
7676

77+
/// Creates extracts for values in \p Plan defined in a loop region and used
78+
/// outside a loop region.
79+
LLVM_ABI_FOR_TEST static void createExtractsForLiveOuts(VPlan &Plan);
80+
7781
/// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
7882
/// VPValue and connect the block to \p Plan, using the VPValue as branch
7983
/// condition.

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
114114
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
115115
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
116116
; CHECK: middle.block:
117-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
118117
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
118+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
119119
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
120120
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
121121
; CHECK: scalar.ph:
@@ -170,8 +170,8 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
170170
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
171171
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
172172
; CHECK: middle.block:
173-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
174173
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
174+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
175175
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
176176
; CHECK: scalar.ph:
177177
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
@@ -319,8 +319,8 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
319319
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
320320
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
321321
; CHECK: middle.block:
322-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
323322
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
323+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
324324
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
325325
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
326326
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -844,8 +844,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
844844
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
845845
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
846846
; CHECK: middle.block:
847-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP5]], i32 14
848847
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP5]], i32 15
848+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP5]], i32 14
849849
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
850850
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
851851
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,8 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) {
504504
; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
505505
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
506506
; CHECK-INTERLEAVE1: middle.block:
507-
; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
508507
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
508+
; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
509509
; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
510510
; CHECK-INTERLEAVE1: scalar.ph:
511511
;
@@ -533,8 +533,8 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) {
533533
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
534534
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
535535
; CHECK-INTERLEAVED: middle.block:
536-
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
537536
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
537+
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
538538
; CHECK-INTERLEAVED-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
539539
; CHECK-INTERLEAVED: scalar.ph:
540540
;
@@ -562,8 +562,8 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) {
562562
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
563563
; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
564564
; CHECK-MAXBW: middle.block:
565-
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
566565
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
566+
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
567567
; CHECK-MAXBW-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
568568
; CHECK-MAXBW: scalar.ph:
569569
;

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -868,11 +868,11 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
868868
; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
869869
; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 8
870870
; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1
871-
; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i32> [[TMP18]], i32 [[TMP22]]
871+
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP16]], i32 [[TMP22]]
872872
; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32()
873873
; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = mul nuw i32 [[TMP24]], 8
874874
; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = sub i32 [[TMP25]], 1
875-
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP16]], i32 [[TMP26]]
875+
; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = extractelement <vscale x 8 x i32> [[TMP18]], i32 [[TMP26]]
876876
; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
877877
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
878878
; CHECK-INTERLEAVE1: scalar.ph:
@@ -922,11 +922,11 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
922922
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32()
923923
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul nuw i32 [[TMP29]], 8
924924
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1
925-
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP31]]
925+
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP31]]
926926
; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
927927
; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = mul nuw i32 [[TMP33]], 8
928928
; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = sub i32 [[TMP34]], 1
929-
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP35]]
929+
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP35]]
930930
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
931931
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
932932
; CHECK-INTERLEAVED: scalar.ph:
@@ -970,11 +970,11 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
970970
; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
971971
; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 8
972972
; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1
973-
; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP22]]
973+
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP22]]
974974
; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32()
975975
; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = mul nuw i32 [[TMP24]], 8
976976
; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1
977-
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP31]]
977+
; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP31]]
978978
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
979979
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
980980
; CHECK-MAXBW: scalar.ph:

llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
3636
; CHECK: [[MIDDLE_BLOCK]]:
3737
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
3838
; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1
39-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 1 x i64> [[TMP7]], i32 [[TMP12]]
39+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 1 x i64> [[WIDE_LOAD]], i32 [[TMP12]]
4040
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
4141
; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1
42-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 1 x i64> [[WIDE_LOAD]], i32 [[TMP15]]
42+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 1 x i64> [[TMP7]], i32 [[TMP15]]
4343
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]]
4444
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
4545
; CHECK: [[SCALAR_PH]]:
@@ -57,7 +57,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
5757
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22
5858
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
5959
; CHECK: [[EXIT]]:
60-
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ]
60+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP16]], %[[MIDDLE_BLOCK]] ]
6161
; CHECK-NEXT: ret i64 [[RES]]
6262
;
6363
entry:

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,17 +546,17 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
546546
; NO-VP: [[MIDDLE_BLOCK]]:
547547
; NO-VP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
548548
; NO-VP-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 4
549-
; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 2
549+
; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
550550
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP19]]
551551
; NO-VP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
552552
; NO-VP-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 4
553-
; NO-VP-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1
553+
; NO-VP-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 2
554554
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP22]]
555555
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]]
556556
; NO-VP-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
557557
; NO-VP: [[SCALAR_PH]]:
558558
; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
559-
; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
559+
; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
560560
; NO-VP-NEXT: br label %[[FOR_BODY:.*]]
561561
; NO-VP: [[FOR_BODY]]:
562562
; NO-VP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ]
@@ -570,7 +570,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
570570
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]]
571571
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
572572
; NO-VP: [[FOR_END]]:
573-
; NO-VP-NEXT: [[FOR1_LCSSA:%.*]] = phi i32 [ [[FOR1]], %[[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
573+
; NO-VP-NEXT: [[FOR1_LCSSA:%.*]] = phi i32 [ [[FOR1]], %[[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ]
574574
; NO-VP-NEXT: ret i32 [[FOR1_LCSSA]]
575575
;
576576
entry:

llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,8 @@ define i16 @iv_and_step_trunc() {
416416
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
417417
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
418418
; CHECK: middle.block:
419-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
420419
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
420+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
421421
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
422422
; CHECK: scalar.ph:
423423
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]

0 commit comments

Comments
 (0)