@@ -24,20 +24,21 @@ target triple = "aarch64-linux-gnu"
2424; DEBUG-EPILOG-PREFER-SCALABLE: Create Skeleton for epilogue vectorized loop (first pass)
2525; DEBUG-EPILOG-PREFER-SCALABLE: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1
2626
27- define void @main_vf_vscale_x_16 (ptr %A ) #0 {
27+ define void @main_vf_vscale_x_16 (ptr %A , i64 %n ) #0 {
2828; CHECK-LABEL: @main_vf_vscale_x_16(
2929; CHECK-NEXT: iter.check:
30- ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
30+ ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 8
31+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
3132; CHECK: vector.main.loop.iter.check:
3233; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
3334; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
34- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024 , [[TMP1]]
35+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]] , [[TMP1]]
3536; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
3637; CHECK: vector.ph:
3738; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
3839; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
39- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024 , [[TMP3]]
40- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024 , [[N_MOD_VF]]
40+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]] , [[TMP3]]
41+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]] , [[N_MOD_VF]]
4142; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
4243; CHECK: vector.body:
4344; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -51,45 +52,48 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
5152; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5253; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5354; CHECK: middle.block:
54- ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024 , [[N_VEC]]
55+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]] , [[N_VEC]]
5556; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
5657; CHECK: vec.epilog.iter.check:
57- ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024 , [[N_VEC]]
58+ ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]] , [[N_VEC]]
5859; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
59- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
60+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
6061; CHECK: vec.epilog.ph:
6162; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
63+ ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
64+ ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
6265; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
6366; CHECK: vec.epilog.vector.body:
6467; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
6568; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]]
6669; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
6770; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
68- ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
69- ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
71+ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
72+ ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
7073; CHECK: vec.epilog.middle.block:
71- ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
74+ ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
75+ ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
7276; CHECK: vec.epilog.scalar.ph:
73- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024 , [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
77+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]] , [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
7478; CHECK-NEXT: br label [[FOR_BODY:%.*]]
7579; CHECK: for.body:
7680;
7781; CHECK-EPILOG-PREFER-SCALABLE-LABEL: @main_vf_vscale_x_16(
7882; CHECK-EPILOG-PREFER-SCALABLE-NEXT: iter.check:
7983; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
8084; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
81- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024 , [[TMP1]]
85+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]] , [[TMP1]]
8286; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
8387; CHECK-EPILOG-PREFER-SCALABLE: vector.main.loop.iter.check:
8488; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
8589; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
86- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 1024 , [[TMP3]]
90+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]] , [[TMP3]]
8791; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
8892; CHECK-EPILOG-PREFER-SCALABLE: vector.ph:
8993; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
9094; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32
91- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024 , [[TMP5]]
92- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024 , [[N_MOD_VF]]
95+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]] , [[TMP5]]
96+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]] , [[N_MOD_VF]]
9397; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
9498; CHECK-EPILOG-PREFER-SCALABLE: vector.body:
9599; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,30 +107,30 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
103107; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
104108; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
105109; CHECK-EPILOG-PREFER-SCALABLE: middle.block:
106- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024 , [[N_VEC]]
110+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]] , [[N_VEC]]
107111; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
108112; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.iter.check:
109- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024 , [[N_VEC]]
113+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]] , [[N_VEC]]
110114; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
111115; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 3
112116; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP12]]
113- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
117+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
114118; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.ph:
115119; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
116120; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
117121; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8
118- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 1024 , [[TMP14]]
119- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 1024 , [[N_MOD_VF2]]
122+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]] , [[TMP14]]
123+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]] , [[N_MOD_VF2]]
120124; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
121125; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.vector.body:
122126; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
123127; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX4]]
124128; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store <vscale x 8 x i8> splat (i8 1), ptr [[TMP15]], align 1
125129; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], [[TMP14]]
126130; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
127- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
131+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
128132; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.middle.block:
129- ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N6:%.*]] = icmp eq i64 1024 , [[N_VEC3]]
133+ ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]] , [[N_VEC3]]
130134; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
131135; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.scalar.ph:
132136; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
@@ -141,7 +145,7 @@ for.body:
141145 %arrayidx = getelementptr inbounds i8 , ptr %A , i64 %iv
142146 store i8 1 , ptr %arrayidx , align 1
143147 %iv.next = add nuw nsw i64 %iv , 1
144- %exitcond = icmp ne i64 %iv.next , 1024
148+ %exitcond = icmp ne i64 %iv.next , %n
145149 br i1 %exitcond , label %for.body , label %exit
146150
147151exit:
0 commit comments