Skip to content

Commit 95a9d2c

Browse files
fhahnpawosm-arm
authored andcommitted
[VPlan] Only execute VPExpandSCEVRecipes once and remove them (NFC).
Instead of executing the whole entry VPIRBB twice, first only execute the VPExpandSCEVRecipes and replace their uses with the expanded VPValue, which will be a live-in. This allows removing special logic in VPExpandSCEVRecipe to support executing twice and allows moving the ExpandedSCEVs map out of VPTransformState. It will also allow adding other recipes to the entry VPBB in the future.
1 parent 615cec7 commit 95a9d2c

File tree

7 files changed

+27
-37
lines changed

7 files changed

+27
-37
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7591,8 +7591,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
75917591

75927592
// 0. Generate SCEV-dependent code in the entry, including TripCount, before
75937593
// making any changes to the CFG.
7594-
if (!BestVPlan.getEntry()->empty())
7595-
BestVPlan.getEntry()->execute(&State);
7594+
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
7595+
auto *Entry = cast<VPIRBasicBlock>(BestVPlan.getEntry());
7596+
State.Builder.SetInsertPoint(Entry->getIRBasicBlock()->getTerminator());
7597+
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
7598+
auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
7599+
if (!ExpSCEV)
7600+
continue;
7601+
ExpSCEV->execute(State);
7602+
ExpandedSCEVs[ExpSCEV->getSCEV()] = State.get(ExpSCEV, VPLane(0));
7603+
VPValue *Exp = BestVPlan.getOrAddLiveIn(ExpandedSCEVs[ExpSCEV->getSCEV()]);
7604+
ExpSCEV->replaceAllUsesWith(Exp);
7605+
if (BestVPlan.getTripCount() == ExpSCEV)
7606+
BestVPlan.resetTripCount(Exp);
7607+
ExpSCEV->eraseFromParent();
7608+
}
75967609

75977610
if (!ILV.getTripCount())
75987611
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7602,9 +7615,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76027615

76037616
// 1. Set up the skeleton for vectorization, including vector pre-header and
76047617
// middle block. The vector loop is created during VPlan execution.
7605-
VPBasicBlock *VectorPH =
7606-
cast<VPBasicBlock>(BestVPlan.getEntry()->getSingleSuccessor());
7607-
7618+
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
76087619
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
76097620
if (VectorizingEpilogue)
76107621
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -7708,7 +7719,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77087719
}
77097720
}
77107721

7711-
return State.ExpandedSCEVs;
7722+
return ExpandedSCEVs;
77127723
}
77137724

77147725
//===--------------------------------------------------------------------===//

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,6 @@ struct VPTransformState {
379379
/// memchecks. The actually versioning is performed manually.
380380
LoopVersioning *LVer = nullptr;
381381

382-
/// Map SCEVs to their expanded values. Populated when executing
383-
/// VPExpandSCEVRecipes.
384-
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
385-
386382
/// VPlan-based type analysis.
387383
VPTypeAnalysis TypeAnalysis;
388384
};

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3346,23 +3346,10 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
33463346

33473347
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
33483348
assert(!State.Lane && "cannot be used in per-lane");
3349-
if (State.ExpandedSCEVs.contains(Expr)) {
3350-
// SCEV Expr has already been expanded, result must already be set. At the
3351-
// moment we have to execute the entry block twice (once before skeleton
3352-
// creation to get expanded SCEVs used by the skeleton and once during
3353-
// regular VPlan execution).
3354-
State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
3355-
assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3356-
"Results must match");
3357-
return;
3358-
}
3359-
33603349
const DataLayout &DL = SE.getDataLayout();
33613350
SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true);
3362-
33633351
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
33643352
&*State.Builder.GetInsertPoint());
3365-
State.ExpandedSCEVs[Expr] = Res;
33663353
State.set(this, Res, VPLane(0));
33673354
}
33683355

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
151151
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
152152
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
153153
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
154-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
154+
; CHECK-NEXT: ir<%0> = original trip-count
155155
; CHECK-EMPTY:
156156
; CHECK-NEXT: ir-bb<for.body.preheader>:
157157
; CHECK-NEXT: IR %0 = zext i32 %n to i64
158-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
159158
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
160159
; CHECK-EMPTY:
161160
; CHECK-NEXT: ir-bb<vector.scevcheck>:
@@ -212,7 +211,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
212211
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
213212
; CHECK-EMPTY:
214213
; CHECK-NEXT: ir-bb<middle.block>:
215-
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
214+
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
216215
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
217216
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
218217
; CHECK-EMPTY:
@@ -400,11 +399,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
400399
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
401400
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
402401
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
403-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
402+
; CHECK-NEXT: ir<%0> = original trip-count
404403
; CHECK-EMPTY:
405404
; CHECK-NEXT: ir-bb<for.body.preheader>:
406405
; CHECK-NEXT: IR %0 = zext i32 %n to i64
407-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
408406
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
409407
; CHECK-EMPTY:
410408
; CHECK-NEXT: ir-bb<vector.scevcheck>:
@@ -461,7 +459,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
461459
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
462460
; CHECK-EMPTY:
463461
; CHECK-NEXT: ir-bb<middle.block>:
464-
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
462+
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
465463
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
466464
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
467465
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/pr45259.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ define i8 @widget(ptr %arr, i8 %t9) {
1616
; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
1717
; CHECK-NEXT: [[T1_0_LCSSA4:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
1818
; CHECK-NEXT: [[T1_0_LCSSA1:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
19+
; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
20+
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
1921
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[ARR1]] to i32
2022
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[TMP0]]
21-
; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
2223
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32
2324
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
24-
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
2525
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
2626
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
2727
; CHECK: vector.scevcheck:

llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
55
; CHECK: VPlan 'Final VPlan for VF={2},UF={1}' {
66
; CHECK-NEXT: Live-in ir<[[VFxUF:.+]]> = VF * UF
77
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
8-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
8+
; CHECK-NEXT: ir<%0> = original trip-count
99
; CHECK-EMPTY:
1010
; CHECK-NEXT: ir-bb<entry>:
11-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
1211
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
1312
; CHECK-EMPTY:
1413
; CHECK-NEXT: ir-bb<vector.ph>:
@@ -86,7 +85,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
8685
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
8786
; CHECK-EMPTY:
8887
; CHECK-NEXT: ir-bb<middle.block>:
89-
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
88+
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, ir<[[VTC]]>
9089
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
9190
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
9291
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
6262
; CHECK: Executing best plan with VF=8, UF=2
6363
; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' {
6464
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
65-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
65+
; CHECK-NEXT: ir<%and> = original trip-count
6666
; CHECK-EMPTY:
6767
; CHECK-NEXT: ir-bb<entry>:
6868
; CHECK-NEXT: IR %and = and i64 %N, 15
69-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
7069
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
7170
; CHECK-EMPTY:
7271
; CHECK-NEXT: ir-bb<vector.ph>:
@@ -92,7 +91,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
9291
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
9392
; CHECK-EMPTY:
9493
; CHECK-NEXT: ir-bb<middle.block>:
95-
; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
94+
; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<%and>, ir<[[VTC]]>
9695
; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]>
9796
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
9897
; CHECK-EMPTY:

0 commit comments

Comments
 (0)