Skip to content

Commit f3ffd6a

Browse files
committed
[VPlan] Optimize more IV increment exit users by using a map
The current pattern matching in getOptimizableIVOf() doesn't cover all cases for wide IV increments, missing optimization opportunities for certain exit users. This patch adds a mapping between wide IV increments and their corresponding IVs to enable optimization of previously unhandled cases.
1 parent a550fef commit f3ffd6a

File tree

7 files changed

+44
-74
lines changed

7 files changed

+44
-74
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8887,15 +8887,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88878887
// Update wide induction increments to use the same step as the corresponding
88888888
// wide induction. This enables detecting induction increments directly in
88898889
// VPlan and removes redundant splats.
8890+
DenseMap<VPValue *, VPWidenInductionRecipe *> MapIVs;
88908891
for (const auto &[Phi, ID] : Legal->getInductionVars()) {
88918892
auto *IVInc = cast<Instruction>(
88928893
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
8893-
if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add)
8894-
continue;
88958894
VPWidenInductionRecipe *WideIV =
88968895
cast<VPWidenInductionRecipe>(RecipeBuilder.getRecipe(Phi));
8897-
VPRecipeBase *R = RecipeBuilder.getRecipe(IVInc);
8898-
R->setOperand(1, WideIV->getStepValue());
8896+
VPValue *V = RecipeBuilder.getVPValueOrAddLiveIn(IVInc);
8897+
if (!dyn_cast<PHINode>(IVInc))
8898+
MapIVs[V] = WideIV;
8899+
if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add)
8900+
continue;
8901+
V->getDefiningRecipe()->setOperand(1, WideIV->getStepValue());
88998902
}
89008903

89018904
DenseMap<VPValue *, VPValue *> IVEndValues;
@@ -8992,7 +8995,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
89928995
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
89938996
WithoutRuntimeCheck);
89948997
}
8995-
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues);
8998+
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, MapIVs);
89968999

89979000
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
89989001
return Plan;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -823,18 +823,24 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
823823

824824
/// Attempts to optimize the induction variable exit values for users in the
825825
/// exit block coming from the latch in the original scalar loop.
826-
static VPValue *
827-
optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
828-
VPBlockBase *PredVPBB, VPValue *Op,
829-
DenseMap<VPValue *, VPValue *> &EndValues) {
826+
static VPValue *optimizeLatchExitInductionUser(
827+
VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op,
828+
DenseMap<VPValue *, VPValue *> &EndValues,
829+
DenseMap<VPValue *, VPWidenInductionRecipe *> MapIVs) {
830830
using namespace VPlanPatternMatch;
831831

832832
VPValue *Incoming;
833833
if (!match(Op, m_VPInstruction<VPInstruction::ExtractLastElement>(
834834
m_VPValue(Incoming))))
835835
return nullptr;
836836

837-
auto *WideIV = getOptimizableIVOf(Incoming);
837+
// Truncated IV is not handled here.
838+
auto *IntOrFpIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Incoming);
839+
if (IntOrFpIV && IntOrFpIV->getTruncInst())
840+
return nullptr;
841+
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Incoming);
842+
if (!WideIV)
843+
WideIV = MapIVs.lookup(Incoming);
838844
if (!WideIV)
839845
return nullptr;
840846

@@ -873,7 +879,8 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
873879
}
874880

875881
void VPlanTransforms::optimizeInductionExitUsers(
876-
VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
882+
VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues,
883+
DenseMap<VPValue *, VPWidenInductionRecipe *> &MapIVs) {
877884
VPBlockBase *MiddleVPBB = Plan.getMiddleBlock();
878885
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
879886
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -883,8 +890,9 @@ void VPlanTransforms::optimizeInductionExitUsers(
883890
for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {
884891
VPValue *Escape = nullptr;
885892
if (PredVPBB == MiddleVPBB)
886-
Escape = optimizeLatchExitInductionUser(
887-
Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), EndValues);
893+
Escape = optimizeLatchExitInductionUser(Plan, TypeInfo, PredVPBB,
894+
ExitIRI->getOperand(Idx),
895+
EndValues, MapIVs);
888896
else
889897
Escape = optimizeEarlyExitInductionUser(Plan, TypeInfo, PredVPBB,
890898
ExitIRI->getOperand(Idx));

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,9 @@ struct VPlanTransforms {
211211
/// If there's a single exit block, optimize its phi recipes that use exiting
212212
/// IV values by feeding them precomputed end values instead, possibly taken
213213
/// one step backwards.
214-
static void
215-
optimizeInductionExitUsers(VPlan &Plan,
216-
DenseMap<VPValue *, VPValue *> &EndValues);
214+
static void optimizeInductionExitUsers(
215+
VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues,
216+
DenseMap<VPValue *, VPWidenInductionRecipe *> &MapIVs);
217217

218218
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
219219
static void materializeBroadcasts(VPlan &Plan);

llvm/test/Transforms/LoopVectorize/X86/induction-step.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
113113
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
114114
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
115115
; CHECK: middle.block:
116-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
117116
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
118117
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
119118
; CHECK: scalar.ph:
@@ -130,7 +129,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
130129
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
131130
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
132131
; CHECK: exit:
133-
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
132+
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
134133
; CHECK-NEXT: ret i16 [[SUB_LCSSA]]
135134
;
136135
entry:

llvm/test/Transforms/LoopVectorize/induction-step.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,11 +368,11 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
368368
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
369369
; CHECK: [[SCALAR_PH]]:
370370
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
371-
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
371+
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
372372
; CHECK-NEXT: br label %[[LOOP:.*]]
373373
; CHECK: [[LOOP]]:
374374
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
375-
; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
375+
; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
376376
; CHECK-NEXT: [[ADD]] = add i16 [[IV_2]], [[O_1]]
377377
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
378378
; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_DST]], align 2
@@ -439,11 +439,11 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
439439
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
440440
; CHECK: [[SCALAR_PH]]:
441441
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
442-
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
442+
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i16 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
443443
; CHECK-NEXT: br label %[[LOOP:.*]]
444444
; CHECK: [[LOOP]]:
445445
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
446-
; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ]
446+
; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ]
447447
; CHECK-NEXT: [[SUB]] = sub i16 [[IV_2]], [[O_1]]
448448
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
449449
; CHECK-NEXT: store i16 [[SUB]], ptr [[GEP_DST]], align 2

llvm/test/Transforms/LoopVectorize/iv_outside_user.ll

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,14 +1095,11 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
10951095
; VEC: [[VECTOR_PH]]:
10961096
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
10971097
; VEC: [[VECTOR_BODY]]:
1098-
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1099-
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
1100-
; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
1098+
; VEC-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
11011099
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
11021100
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
11031101
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2
1104-
; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
1105-
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
1102+
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
11061103
; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
11071104
; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
11081105
; VEC: [[MIDDLE_BLOCK]]:
@@ -1118,7 +1115,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
11181115
; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
11191116
; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
11201117
; VEC: [[E_EXIT]]:
1121-
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
1118+
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
11221119
; VEC-NEXT: ret i32 [[RES]]
11231120
;
11241121
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
@@ -1136,7 +1133,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
11361133
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
11371134
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
11381135
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
1139-
; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP1]]
11401136
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
11411137
; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
11421138
; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1153,7 +1149,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
11531149
; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
11541150
; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
11551151
; INTERLEAVE: [[E_EXIT]]:
1156-
; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
1152+
; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
11571153
; INTERLEAVE-NEXT: ret i32 [[RES]]
11581154
;
11591155
entry:
@@ -1182,27 +1178,20 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
11821178
; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
11831179
; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
11841180
; VEC: [[VECTOR_PH]]:
1185-
; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP_2]], i64 0
1186-
; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
11871181
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
11881182
; VEC: [[VECTOR_BODY]]:
11891183
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1190-
; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
11911184
; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
11921185
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
11931186
; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
11941187
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
11951188
; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
11961189
; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2
11971190
; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2
1198-
; VEC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 1)
1199-
; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]]
12001191
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
1201-
; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
12021192
; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
12031193
; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
12041194
; VEC: [[MIDDLE_BLOCK]]:
1205-
; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
12061195
; VEC-NEXT: br i1 true, label %[[E_EXIT:.*]], label %[[SCALAR_PH]]
12071196
; VEC: [[SCALAR_PH]]:
12081197
; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -1216,7 +1205,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
12161205
; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
12171206
; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
12181207
; VEC: [[E_EXIT]]:
1219-
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
1208+
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
12201209
; VEC-NEXT: ret i32 [[RES]]
12211210
;
12221211
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
@@ -1235,8 +1224,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
12351224
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
12361225
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
12371226
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
1238-
; INTERLEAVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 1
1239-
; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP4]]
12401227
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
12411228
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
12421229
; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1254,7 +1241,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
12541241
; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
12551242
; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
12561243
; INTERLEAVE: [[E_EXIT]]:
1257-
; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
1244+
; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
12581245
; INTERLEAVE-NEXT: ret i32 [[RES]]
12591246
;
12601247
entry:
@@ -1286,16 +1273,11 @@ define i32 @iv_ext_used_outside( ptr %dst) {
12861273
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
12871274
; VEC: [[VECTOR_BODY]]:
12881275
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1289-
; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
12901276
; VEC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
12911277
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i16 [[OFFSET_IDX]]
12921278
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0
12931279
; VEC-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4
1294-
; VEC-NEXT: [[TMP5:%.*]] = add nuw nsw <2 x i16> [[VEC_IND]], splat (i16 1)
1295-
; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1
1296-
; VEC-NEXT: [[TMP7:%.*]] = zext nneg i16 [[TMP8]] to i32
12971280
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
1298-
; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
12991281
; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
13001282
; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
13011283
; VEC: [[MIDDLE_BLOCK]]:
@@ -1314,7 +1296,7 @@ define i32 @iv_ext_used_outside( ptr %dst) {
13141296
; VEC-NEXT: [[EC:%.*]] = icmp samesign ult i16 [[IV_1]], 128
13151297
; VEC-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}}
13161298
; VEC: [[EXIT]]:
1317-
; VEC-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
1299+
; VEC-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ 128, %[[MIDDLE_BLOCK]] ]
13181300
; VEC-NEXT: ret i32 [[IV_1_EXT_LCSSA]]
13191301
;
13201302
; INTERLEAVE-LABEL: define i32 @iv_ext_used_outside(
@@ -1331,8 +1313,6 @@ define i32 @iv_ext_used_outside( ptr %dst) {
13311313
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i16 [[TMP1]]
13321314
; INTERLEAVE-NEXT: store i32 0, ptr [[TMP2]], align 4
13331315
; INTERLEAVE-NEXT: store i32 0, ptr [[TMP3]], align 4
1334-
; INTERLEAVE-NEXT: [[TMP4:%.*]] = add nuw nsw i16 [[TMP1]], 1
1335-
; INTERLEAVE-NEXT: [[TMP5:%.*]] = zext nneg i16 [[TMP4]] to i32
13361316
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
13371317
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
13381318
; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1352,7 +1332,7 @@ define i32 @iv_ext_used_outside( ptr %dst) {
13521332
; INTERLEAVE-NEXT: [[EC:%.*]] = icmp samesign ult i16 [[IV_1]], 128
13531333
; INTERLEAVE-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}}
13541334
; INTERLEAVE: [[EXIT]]:
1355-
; INTERLEAVE-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
1335+
; INTERLEAVE-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ 128, %[[MIDDLE_BLOCK]] ]
13561336
; INTERLEAVE-NEXT: ret i32 [[IV_1_EXT_LCSSA]]
13571337
;
13581338
entry:
@@ -1386,8 +1366,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
13861366
; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0
13871367
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1
13881368
; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2
1389-
; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1
1390-
; VEC-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1
13911369
; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]]
13921370
; VEC: [[MIDDLE_BLOCK]]:
13931371
; VEC-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -1405,7 +1383,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
14051383
; VEC-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1
14061384
; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
14071385
; VEC: [[EXIT]]:
1408-
; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
1386+
; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ]
14091387
; VEC-NEXT: ret i64 [[IV_1_NEXT_LCSSA]]
14101388
;
14111389
; INTERLEAVE-LABEL: define i64 @test_iv_increment_incremented(
@@ -1419,8 +1397,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
14191397
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 2
14201398
; INTERLEAVE-NEXT: store i16 1, ptr [[TMP0]], align 2
14211399
; INTERLEAVE-NEXT: store i16 1, ptr [[TMP1]], align 2
1422-
; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 1, -1
1423-
; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 1
14241400
; INTERLEAVE-NEXT: br label %[[MIDDLE_BLOCK:.*]]
14251401
; INTERLEAVE: [[MIDDLE_BLOCK]]:
14261402
; INTERLEAVE-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -1438,7 +1414,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
14381414
; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1
14391415
; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
14401416
; INTERLEAVE: [[EXIT]]:
1441-
; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ]
1417+
; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ]
14421418
; INTERLEAVE-NEXT: ret i64 [[IV_1_NEXT_LCSSA]]
14431419
;
14441420
entry:

0 commit comments

Comments
 (0)