Skip to content

Commit 351d398

Browse files
committed
[VPlan] Run final VPlan simplifications before codegen.
Dissolving the hierarchical VPlan CFG and converting abstract to concrete recipes can expose additional simplification opportunities. Do a final run of simplifyRecipes before executing the VPlan.
1 parent 1893caa commit 351d398

File tree

124 files changed

+659
-1147
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+659
-1147
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7292,6 +7292,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72927292
BestVPlan, VectorPH, CM.foldTailByMasking(),
72937293
CM.requiresScalarEpilogue(BestVF.isVector()));
72947294
VPlanTransforms::materializeVFAndVFxUF(BestVPlan, VectorPH, BestVF);
7295+
VPlanTransforms::simplifyRecipes(BestVPlan);
72957296

72967297
// Perform the actual loop transformation.
72977298
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
183183
case Instruction::ZExt:
184184
case Instruction::Mul:
185185
case Instruction::FMul:
186+
case VPInstruction::Broadcast:
186187
// Opcodes above can only use EVL after wide inductions have been
187188
// expanded.
188189
if (!VerifyLate) {

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ exit:
7272
define void @powi_call(ptr %P) {
7373
; CHECK-LABEL: define void @powi_call(
7474
; CHECK-SAME: ptr [[P:%.*]]) {
75-
; CHECK-NEXT: [[ENTRY:.*]]:
75+
; CHECK-NEXT: [[ENTRY:.*:]]
7676
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
7777
; CHECK: [[VECTOR_PH]]:
7878
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -84,10 +84,9 @@ define void @powi_call(ptr %P) {
8484
; CHECK: [[MIDDLE_BLOCK]]:
8585
; CHECK-NEXT: br label %[[EXIT:.*]]
8686
; CHECK: [[SCALAR_PH]]:
87-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
8887
; CHECK-NEXT: br label %[[LOOP:.*]]
8988
; CHECK: [[LOOP]]:
90-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
89+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
9190
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[IV]]
9291
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP]], align 8
9392
; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.powi.f64.i32(double [[L]], i32 3)

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1515
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
1616
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
1717
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
18-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]]
19-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
18+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
2019
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2120
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2221
; CHECK: vector.body:
@@ -90,8 +89,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
9089
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
9190
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
9291
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
93-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]]
94-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
92+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
9593
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
9694
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
9795
; CHECK: vector.body:

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
119119
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
120120
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
121121
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
122-
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP6]]
123-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
122+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
124123
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
125124
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
126125
; CHECK: [[VECTOR_BODY]]:
@@ -235,8 +234,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
235234
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
236235
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
237236
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
238-
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP6]]
239-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
237+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
240238
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
241239
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
242240
; CHECK: [[VECTOR_BODY]]:

llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux"
88
define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %dst.1, ptr noalias %src.1, ptr %src.2) {
99
; CHECK-LABEL: define void @check_widen_intrinsic_with_nnan(
1010
; CHECK-SAME: ptr noalias [[DST_0:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[SRC_1:%.*]], ptr [[SRC_2:%.*]]) #[[ATTR0:[0-9]+]] {
11-
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: [[ENTRY:.*:]]
1212
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1313
; CHECK: [[VECTOR_PH]]:
1414
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -70,10 +70,9 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds
7070
; CHECK: [[MIDDLE_BLOCK]]:
7171
; CHECK-NEXT: br label %[[EXIT:.*]]
7272
; CHECK: [[SCALAR_PH]]:
73-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
7473
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
7574
; CHECK: [[LOOP_HEADER]]:
76-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
75+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
7776
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds double, ptr [[SRC_1]], i64 [[IV]]
7877
; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[GEP_SRC_1]], align 8
7978
; CHECK-NEXT: [[ABS:%.*]] = tail call nnan double @llvm.fabs.f64(double [[L_1]])

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ target triple = "arm64-apple-macosx14.0.0"
66

77
define double @test_reduction_costs() {
88
; CHECK-LABEL: define double @test_reduction_costs() {
9-
; CHECK-NEXT: [[ENTRY:.*]]:
9+
; CHECK-NEXT: [[ENTRY:.*:]]
1010
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1111
; CHECK: [[VECTOR_PH]]:
1212
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -19,14 +19,11 @@ define double @test_reduction_costs() {
1919
; CHECK: [[MIDDLE_BLOCK]]:
2020
; CHECK-NEXT: br label %[[EXIT:.*]]
2121
; CHECK: [[SCALAR_PH]]:
22-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
23-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
24-
; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
2522
; CHECK-NEXT: br label %[[LOOP_1:.*]]
2623
; CHECK: [[LOOP_1]]:
27-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ]
28-
; CHECK-NEXT: [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ]
29-
; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
24+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ]
25+
; CHECK-NEXT: [[R_1:%.*]] = phi double [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ]
26+
; CHECK-NEXT: [[R_2:%.*]] = phi double [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
3027
; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00
3128
; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00
3229
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,14 +171,11 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
171171
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
172172
; CHECK-NEXT: br label [[EXIT:%.*]]
173173
; CHECK: scalar.ph:
174-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
175-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[START]], [[ENTRY]] ]
176-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ]
177174
; CHECK-NEXT: br label [[LOOP:%.*]]
178175
; CHECK: loop:
179-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
180-
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
181-
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ]
176+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
177+
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
178+
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ]
182179
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4
183180
; CHECK-NEXT: [[RECUR_NEXT]] = zext i32 [[L]] to i64
184181
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4

llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
6565
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
6666
; CHECK-NEXT: [[TMP26:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1)
6767
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP26]]
68-
; CHECK-NEXT: [[TMP27:%.*]] = mul i64 1, [[TMP17]]
69-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP27]], i64 0
68+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0
7069
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
7170
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
7271
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
@@ -201,8 +200,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
201200
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
202201
; CHECK-NEXT: [[TMP38:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1)
203202
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP38]]
204-
; CHECK-NEXT: [[TMP39:%.*]] = mul i64 1, [[TMP17]]
205-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP39]], i64 0
203+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0
206204
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
207205
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
208206
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:

llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "arm64-apple-macosx14.0.0"
88
define i32 @test_invariant_replicate_region(i32 %x, i1 %c) {
99
; CHECK-LABEL: define i32 @test_invariant_replicate_region(
1010
; CHECK-SAME: i32 [[X:%.*]], i1 [[C:%.*]]) {
11-
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: [[ENTRY:.*:]]
1212
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1313
; CHECK: [[VECTOR_PH]]:
1414
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
@@ -52,10 +52,9 @@ define i32 @test_invariant_replicate_region(i32 %x, i1 %c) {
5252
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
5353
; CHECK-NEXT: br label %[[EXIT:.*]]
5454
; CHECK: [[SCALAR_PH]]:
55-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
5655
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
5756
; CHECK: [[LOOP_HEADER]]:
58-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
57+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
5958
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
6059
; CHECK: [[THEN]]:
6160
; CHECK-NEXT: [[REM_1:%.*]] = urem i32 10, [[X]]

0 commit comments

Comments
 (0)