Skip to content

Commit 4dc989d

Browse files
committed
Update users
1 parent 339711f commit 4dc989d

15 files changed

+120
-143
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 35 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8442,56 +8442,11 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84428442
}
84438443
}
84448444

8445-
// Collect VPIRInstructions for phis in the exit block from the latch only.
8446-
static SetVector<VPIRInstruction *> collectUsersInLatchExitBlock(VPlan &Plan) {
8447-
SetVector<VPIRInstruction *> ExitUsersToFix;
8448-
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
8449-
8450-
if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock())
8451-
continue;
8452-
8453-
for (VPRecipeBase &R : ExitVPBB->phis()) {
8454-
auto *ExitIRI = cast<VPIRPhi>(&R);
8455-
assert(ExitIRI->getNumOperands() == 1 && "must have a single operand");
8456-
VPValue *V = ExitIRI->getOperand(0);
8457-
if (V->isLiveIn())
8458-
continue;
8459-
assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
8460-
"Only recipes defined inside a region should need fixing.");
8461-
ExitUsersToFix.insert(ExitIRI);
8462-
}
8463-
}
8464-
return ExitUsersToFix;
8465-
}
8466-
8467-
// Add exit values to \p Plan. Extracts are added for each entry in \p
8468-
// ExitUsersToFix if needed and their operands are updated.
8469-
static void
8470-
addUsersInExitBlocks(VPlan &Plan,
8471-
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8472-
if (ExitUsersToFix.empty())
8473-
return;
8474-
8475-
auto *MiddleVPBB = Plan.getMiddleBlock();
8476-
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8477-
8478-
// Introduce extract for exiting values and update the VPIRInstructions
8479-
// modeling the corresponding LCSSA phis.
8480-
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8481-
assert(ExitIRI->getNumOperands() == 1 &&
8482-
ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
8483-
"exit values from early exits must be fixed when branch to "
8484-
"early-exit is added");
8485-
ExitIRI->extractLastLaneOfFirstOperand(B);
8486-
}
8487-
}
8488-
84898445
/// Handle users in the exit block for first order reductions in the original
84908446
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
84918447
/// users in the original exit block using the VPIRInstruction wrapping to the
84928448
/// LCSSA phi.
8493-
static void addExitUsersForFirstOrderRecurrences(
8494-
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix, VFRange &Range) {
8449+
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range) {
84958450
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
84968451
auto *ScalarPHVPBB = Plan.getScalarPreheader();
84978452
auto *MiddleVPBB = Plan.getMiddleBlock();
@@ -8580,23 +8535,23 @@ static void addExitUsersForFirstOrderRecurrences(
85808535
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
85818536
// Extract the penultimate value of the recurrence and use it as operand for
85828537
// the VPIRInstruction modeling the phi.
8583-
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8584-
if (ExitIRI->getOperand(0) != FOR)
8538+
for (VPUser *U : to_vector(FOR->users())) {
8539+
using namespace llvm::VPlanPatternMatch;
8540+
if (!match(U, m_VPInstruction<VPInstruction::ExtractLastElement>(
8541+
m_Specific(FOR))))
85858542
continue;
85868543
// For VF vscale x 1, if vscale = 1, we are unable to extract the
8587-
// penultimate value of the recurrence. Instead, we rely on function
8588-
// addUsersInExitBlocks to extract the last element from the result of
8589-
// VPInstruction::FirstOrderRecurrenceSplice by leaving the user of the
8590-
// recurrence phi in ExitUsersToFix.
8544+
// penultimate value of the recurrence. Instead we rely on the existing
8545+
// extract of he last element from the result of
8546+
// VPInstruction::FirstOrderRecurrenceSplice.
85918547
// TODO: Consider vscale_range info and UF.
85928548
if (LoopVectorizationPlanner::getDecisionAndClampRange(IsScalableOne,
85938549
Range))
85948550
return;
85958551
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
85968552
VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()},
85978553
{}, "vector.recur.extract.for.phi");
8598-
ExitIRI->setOperand(0, PenultimateElement);
8599-
ExitUsersToFix.remove(ExitIRI);
8554+
cast<VPInstruction>(U)->replaceAllUsesWith(PenultimateElement);
86008555
}
86018556
}
86028557
}
@@ -8631,6 +8586,28 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86318586
Legal->hasUncountableEarlyExit(), Range);
86328587
VPlanTransforms::createLoopRegions(*Plan);
86338588

8589+
for (auto *EB : Plan->getExitBlocks()) {
8590+
auto *MiddleVPBB = Plan->getMiddleBlock();
8591+
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8592+
8593+
if (EB->getSinglePredecessor() != Plan->getMiddleBlock())
8594+
continue;
8595+
8596+
for (auto &P : EB->phis()) {
8597+
auto *ExitIRI = cast<VPIRPhi>(&P);
8598+
for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
8599+
VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
8600+
if (!Inc || !Inc->getParent()->getParent())
8601+
continue;
8602+
assert(ExitIRI->getNumOperands() == 1 &&
8603+
ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
8604+
"exit values from early exits must be fixed when branch to "
8605+
"early-exit is added");
8606+
ExitIRI->extractLastLaneOfFirstOperand(B);
8607+
}
8608+
}
8609+
}
8610+
86348611
// Don't use getDecisionAndClampRange here, because we don't know the UF
86358612
// so this function is better to be conservative, rather than to split
86368613
// it up into different VPlans.
@@ -8802,12 +8779,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88028779
R->setOperand(1, WideIV->getStepValue());
88038780
}
88048781

8782+
addExitUsersForFirstOrderRecurrences(*Plan, Range);
88058783
DenseMap<VPValue *, VPValue *> IVEndValues;
88068784
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
8807-
SetVector<VPIRInstruction *> ExitUsersToFix =
8808-
collectUsersInLatchExitBlock(*Plan);
8809-
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix, Range);
8810-
addUsersInExitBlocks(*Plan, ExitUsersToFix);
8785+
8786+
if (CM.foldTailByMasking()) {
8787+
}
88118788

88128789
// ---------------------------------------------------------------------------
88138790
// Transform initial VPlan: Apply previously taken decisions, in order, to

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
114114
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
115115
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
116116
; CHECK: middle.block:
117-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
118117
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
118+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
119119
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
120120
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
121121
; CHECK: scalar.ph:
@@ -170,8 +170,8 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
170170
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
171171
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
172172
; CHECK: middle.block:
173-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
174173
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
174+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
175175
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
176176
; CHECK: scalar.ph:
177177
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
@@ -319,8 +319,8 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
319319
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
320320
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
321321
; CHECK: middle.block:
322-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
323322
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
323+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
324324
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
325325
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
326326
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -844,8 +844,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
844844
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
845845
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
846846
; CHECK: middle.block:
847-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP5]], i32 14
848847
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP5]], i32 15
848+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP5]], i32 14
849849
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
850850
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
851851
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,8 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) {
504504
; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
505505
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
506506
; CHECK-INTERLEAVE1: middle.block:
507-
; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
508507
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
508+
; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
509509
; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
510510
; CHECK-INTERLEAVE1: scalar.ph:
511511
;
@@ -533,8 +533,8 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) {
533533
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
534534
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
535535
; CHECK-INTERLEAVED: middle.block:
536-
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
537536
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
537+
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
538538
; CHECK-INTERLEAVED-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
539539
; CHECK-INTERLEAVED: scalar.ph:
540540
;
@@ -562,8 +562,8 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) {
562562
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
563563
; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
564564
; CHECK-MAXBW: middle.block:
565-
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
566565
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15
566+
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15
567567
; CHECK-MAXBW-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
568568
; CHECK-MAXBW: scalar.ph:
569569
;

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -868,11 +868,11 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
868868
; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
869869
; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 8
870870
; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1
871-
; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i32> [[TMP18]], i32 [[TMP22]]
871+
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP16]], i32 [[TMP22]]
872872
; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32()
873873
; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = mul nuw i32 [[TMP24]], 8
874874
; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = sub i32 [[TMP25]], 1
875-
; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP16]], i32 [[TMP26]]
875+
; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = extractelement <vscale x 8 x i32> [[TMP18]], i32 [[TMP26]]
876876
; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
877877
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
878878
; CHECK-INTERLEAVE1: scalar.ph:
@@ -922,11 +922,11 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
922922
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32()
923923
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul nuw i32 [[TMP29]], 8
924924
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1
925-
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP31]]
925+
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP31]]
926926
; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
927927
; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = mul nuw i32 [[TMP33]], 8
928928
; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = sub i32 [[TMP34]], 1
929-
; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP35]]
929+
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP35]]
930930
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
931931
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
932932
; CHECK-INTERLEAVED: scalar.ph:
@@ -970,11 +970,11 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
970970
; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
971971
; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 8
972972
; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1
973-
; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP22]]
973+
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP22]]
974974
; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32()
975975
; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = mul nuw i32 [[TMP24]], 8
976976
; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1
977-
; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP31]]
977+
; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP31]]
978978
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
979979
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
980980
; CHECK-MAXBW: scalar.ph:

llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
3636
; CHECK: [[MIDDLE_BLOCK]]:
3737
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
3838
; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1
39-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 1 x i64> [[TMP7]], i32 [[TMP12]]
39+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 1 x i64> [[WIDE_LOAD]], i32 [[TMP12]]
4040
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
4141
; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1
42-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 1 x i64> [[WIDE_LOAD]], i32 [[TMP15]]
42+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 1 x i64> [[TMP7]], i32 [[TMP15]]
4343
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]]
4444
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
4545
; CHECK: [[SCALAR_PH]]:
@@ -57,7 +57,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
5757
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22
5858
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
5959
; CHECK: [[EXIT]]:
60-
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ]
60+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP16]], %[[MIDDLE_BLOCK]] ]
6161
; CHECK-NEXT: ret i64 [[RES]]
6262
;
6363
entry:

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,17 +546,17 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
546546
; NO-VP: [[MIDDLE_BLOCK]]:
547547
; NO-VP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
548548
; NO-VP-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 4
549-
; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 2
549+
; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
550550
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP19]]
551551
; NO-VP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
552552
; NO-VP-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 4
553-
; NO-VP-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1
553+
; NO-VP-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 2
554554
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP22]]
555555
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]]
556556
; NO-VP-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
557557
; NO-VP: [[SCALAR_PH]]:
558558
; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
559-
; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
559+
; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
560560
; NO-VP-NEXT: br label %[[FOR_BODY:.*]]
561561
; NO-VP: [[FOR_BODY]]:
562562
; NO-VP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ]
@@ -570,7 +570,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
570570
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]]
571571
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
572572
; NO-VP: [[FOR_END]]:
573-
; NO-VP-NEXT: [[FOR1_LCSSA:%.*]] = phi i32 [ [[FOR1]], %[[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
573+
; NO-VP-NEXT: [[FOR1_LCSSA:%.*]] = phi i32 [ [[FOR1]], %[[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ]
574574
; NO-VP-NEXT: ret i32 [[FOR1_LCSSA]]
575575
;
576576
entry:

llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,8 @@ define i16 @iv_and_step_trunc() {
416416
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
417417
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
418418
; CHECK: middle.block:
419-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
420419
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
420+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
421421
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
422422
; CHECK: scalar.ph:
423423
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]

0 commit comments

Comments
 (0)