@@ -86,7 +86,7 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
8686; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]]
8787; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[START]], [[N_VEC]]
8888; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
89- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5 :![0-9]+]]
89+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4 :![0-9]+]]
9090; CHECK: [[VEC_EPILOG_PH]]:
9191; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
9292; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[START]], 4
@@ -100,6 +100,7 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
100100; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[NEXT_GEP5]], align 1
101101; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4
102102; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
103+ ; CHECK-NEXT: br i1 [[TMP5]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
103104; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
104105; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[START]], [[N_VEC3]]
105106; CHECK-NEXT: br i1 [[CMP_N7]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
@@ -114,6 +115,9 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
114115; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
115116; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1
116117; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
118+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5]]
119+ ; CHECK: [[EXIT_LOOPEXIT]]:
120+ ; CHECK-NEXT: br label %[[EXIT]]
117121; CHECK: [[EXIT]]:
118122; CHECK-NEXT: ret void
119123;
@@ -207,22 +211,26 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y.
207211; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
208212; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
209213; CHECK: [[PRED_STORE_IF]]:
214+ ; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]]
210215; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
211216; CHECK: [[PRED_STORE_CONTINUE]]:
212217; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
213218; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]]
214219; CHECK: [[PRED_STORE_IF42]]:
220+ ; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META6]], !noalias [[META9]]
215221; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]]
216222; CHECK: [[PRED_STORE_CONTINUE43]]:
217223; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer
218224; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
219225; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]]
220226; CHECK: [[PRED_STORE_IF44]]:
227+ ; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
221228; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]]
222229; CHECK: [[PRED_STORE_CONTINUE45]]:
223230; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
224231; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]]
225232; CHECK: [[PRED_STORE_IF46]]:
233+ ; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META14]], !noalias [[META15]]
226234; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]]
227235; CHECK: [[PRED_STORE_CONTINUE47]]:
228236; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer
@@ -232,17 +240,35 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y.
232240; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]]
233241; CHECK: [[PRED_STORE_IF48]]:
234242; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0
243+ ; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META16:![0-9]+]], !noalias [[META17:![0-9]+]]
235244; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]]
236245; CHECK: [[PRED_STORE_CONTINUE49]]:
237246; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1
238247; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]]
239248; CHECK: [[PRED_STORE_IF50]]:
240249; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1
250+ ; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META16]], !noalias [[META17]]
251+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]]
252+ ; CHECK: [[PRED_STORE_CONTINUE51]]:
253+ ; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
254+ ; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP19]]
255+ ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0
256+ ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]]
257+ ; CHECK: [[PRED_STORE_IF52]]:
258+ ; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META18:![0-9]+]]
259+ ; CHECK-NEXT: store i64 [[TMP22]], ptr [[DST]], align 8, !alias.scope [[META19:![0-9]+]], !noalias [[META18]]
241260; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]]
242261; CHECK: [[PRED_STORE_CONTINUE53]]:
243262; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1
244263; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55]]
245264; CHECK: [[PRED_STORE_IF54]]:
265+ ; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META18]]
266+ ; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META19]], !noalias [[META18]]
267+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]]
268+ ; CHECK: [[PRED_STORE_CONTINUE55]]:
269+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
270+ ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
271+ ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
246272; CHECK: [[MIDDLE_BLOCK]]:
247273; CHECK-NEXT: br label %[[SCALAR_PH]]
248274; CHECK: [[SCALAR_PH]]:
@@ -283,6 +309,7 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y.
283309; CHECK: [[LOOP_LATCH]]:
284310; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
285311; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64
312+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
286313; CHECK: [[EXIT]]:
287314; CHECK-NEXT: ret void
288315;
@@ -358,12 +385,21 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 {
358385; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8
359386; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
360387; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
388+ ; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
389+ ; CHECK: [[MIDDLE_BLOCK]]:
390+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
391+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
392+ ; CHECK: [[SCALAR_PH]]:
393+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
394+ ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
395+ ; CHECK: [[FOR_BODY]]:
361396; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
362397; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 0, align 16
363398; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[X]], i64 [[IV]]
364399; CHECK-NEXT: store ptr [[TMP12]], ptr [[ARRAYIDX]], align 8
365400; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
366401; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
402+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
367403; CHECK: [[FOR_END]]:
368404; CHECK-NEXT: ret void
369405;
@@ -392,4 +428,24 @@ declare i64 @llvm.umin.i64(i64, i64)
392428; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
393429; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
394430; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
395- ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
431+ ; CHECK: [[PROF4]] = !{!"branch_weights", i32 4, i32 12}
432+ ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
433+ ; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
434+ ; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
435+ ; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
436+ ; CHECK: [[META9]] = !{[[META10:![0-9]+]], [[META11:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]]}
437+ ; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]}
438+ ; CHECK: [[META11]] = distinct !{[[META11]], [[META8]]}
439+ ; CHECK: [[META12]] = distinct !{[[META12]], [[META8]]}
440+ ; CHECK: [[META13]] = distinct !{[[META13]], [[META8]]}
441+ ; CHECK: [[META14]] = !{[[META10]]}
442+ ; CHECK: [[META15]] = !{[[META11]], [[META12]], [[META13]]}
443+ ; CHECK: [[META16]] = !{[[META11]]}
444+ ; CHECK: [[META17]] = !{[[META12]], [[META13]]}
445+ ; CHECK: [[META18]] = !{[[META13]]}
446+ ; CHECK: [[META19]] = !{[[META12]]}
447+ ; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
448+ ; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]]}
449+ ; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
450+ ; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]}
451+ ;.
0 commit comments