|
2 | 2 | ; REQUIRES: asserts
|
3 | 3 | ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -force-target-instruction-cost=1 -S 2>%t | FileCheck %s --check-prefix=CHECK
|
4 | 4 | ; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
|
5 |
| -; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=8 --debug-only=loop-vectorize -S 2>%t | FileCheck %s --check-prefix=CHECK |
6 |
| -; RUN: cat %t | FileCheck %s --check-prefix=DEBUG |
7 | 5 | ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S 2>%t | FileCheck %s --check-prefix=CHECK-VF8
|
8 | 6 | ; RUN: cat %t | FileCheck %s --check-prefix=DEBUG-FORCED
|
9 | 7 |
|
10 | 8 | target triple = "aarch64-linux-gnu"
|
11 | 9 |
|
12 |
| -; DEBUG: LV: Checking a loop in "f1" |
| 10 | +; DEBUG: LV: Checking a loop in "main_vf_vscale_x_16" |
13 | 11 | ; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
|
14 | 12 | ; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1
|
15 | 13 |
|
16 |
| -; DEBUG-FORCED: LV: Checking a loop in "f1" |
| 14 | +; DEBUG-FORCED: LV: Checking a loop in "main_vf_vscale_x_16" |
17 | 15 | ; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced.
|
18 | 16 | ; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass)
|
19 | 17 | ; DEBUG-FORCED: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
|
20 | 18 |
|
21 |
| -define void @f1(i8* %A) #0 { |
22 |
| -; CHECK-LABEL: @f1( |
| 19 | +define void @main_vf_vscale_x_16(i8* %A) #0 { |
| 20 | +; CHECK-LABEL: @main_vf_vscale_x_16( |
23 | 21 | ; CHECK-NEXT: iter.check:
|
24 | 22 | ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
25 | 23 | ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
|
@@ -105,7 +103,7 @@ define void @f1(i8* %A) #0 {
|
105 | 103 | ; CHECK: exit:
|
106 | 104 | ; CHECK-NEXT: ret void
|
107 | 105 | ;
|
108 |
| -; CHECK-VF8-LABEL: @f1( |
| 106 | +; CHECK-VF8-LABEL: @main_vf_vscale_x_16( |
109 | 107 | ; CHECK-VF8-NEXT: iter.check:
|
110 | 108 | ; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
|
111 | 109 | ; CHECK-VF8: vector.main.loop.iter.check:
|
@@ -195,4 +193,185 @@ exit:
|
195 | 193 | ret void
|
196 | 194 | }
|
197 | 195 |
|
| 196 | + |
| 197 | +; DEBUG: LV: Checking a loop in "main_vf_vscale_x_2" |
| 198 | +; DEBUG: Create Skeleton for epilogue vectorized loop (first pass) |
| 199 | +; DEBUG: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 |
| 200 | + |
| 201 | +; DEBUG-FORCED: LV: Checking a loop in "main_vf_vscale_x_2" |
| 202 | +; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced. |
| 203 | +; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass) |
| 204 | +; DEBUG-FORCED: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 |
| 205 | + |
| 206 | +; When the vector.body uses VF=vscale x 1 (or VF=vscale x 2 because |
| 207 | +; that's the minimum supported VF by SVE), we could still use a wide |
| 208 | +; fixed-width VF=8 for the epilogue if the vectors are known to be |
| 209 | +; sufficiently wide. This information can be deduced from vscale_range or |
| 210 | +; VScaleForTuning (set by mcpu/mtune). |
| 211 | +define void @main_vf_vscale_x_2(i64* %A) #0 vscale_range(8, 8) { |
| 212 | +; CHECK-LABEL: @main_vf_vscale_x_2( |
| 213 | +; CHECK-NEXT: iter.check: |
| 214 | +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] |
| 215 | +; CHECK: vector.main.loop.iter.check: |
| 216 | +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| 217 | +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 |
| 218 | +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] |
| 219 | +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] |
| 220 | +; CHECK: vector.ph: |
| 221 | +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| 222 | +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 |
| 223 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] |
| 224 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] |
| 225 | +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| 226 | +; CHECK: vector.body: |
| 227 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 228 | +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 |
| 229 | +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() |
| 230 | +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 |
| 231 | +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 |
| 232 | +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 |
| 233 | +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] |
| 234 | +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP4]] |
| 235 | +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP9]] |
| 236 | +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 0 |
| 237 | +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <vscale x 2 x i64>* |
| 238 | +; CHECK-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64>* [[TMP13]], align 1 |
| 239 | +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() |
| 240 | +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 2 |
| 241 | +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 [[TMP15]] |
| 242 | +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <vscale x 2 x i64>* |
| 243 | +; CHECK-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64>* [[TMP17]], align 1 |
| 244 | +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() |
| 245 | +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 |
| 246 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] |
| 247 | +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| 248 | +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] |
| 249 | +; CHECK: middle.block: |
| 250 | +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] |
| 251 | +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] |
| 252 | +; CHECK: vec.epilog.iter.check: |
| 253 | +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] |
| 254 | +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 |
| 255 | +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] |
| 256 | +; CHECK: vec.epilog.ph: |
| 257 | +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| 258 | +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] |
| 259 | +; CHECK: vec.epilog.vector.body: |
| 260 | +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] |
| 261 | +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 |
| 262 | +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP21]] |
| 263 | +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP22]], i32 0 |
| 264 | +; CHECK-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to <8 x i64>* |
| 265 | +; CHECK-NEXT: store <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64>* [[TMP24]], align 1 |
| 266 | +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 |
| 267 | +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 |
| 268 | +; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] |
| 269 | +; CHECK: vec.epilog.middle.block: |
| 270 | +; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 |
| 271 | +; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] |
| 272 | +; CHECK: vec.epilog.scalar.ph: |
| 273 | +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] |
| 274 | +; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| 275 | +; CHECK: for.body: |
| 276 | +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] |
| 277 | +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] |
| 278 | +; CHECK-NEXT: store i64 1, i64* [[ARRAYIDX]], align 1 |
| 279 | +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| 280 | +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 |
| 281 | +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] |
| 282 | +; CHECK: exit.loopexit: |
| 283 | +; CHECK-NEXT: br label [[EXIT]] |
| 284 | +; CHECK: exit: |
| 285 | +; CHECK-NEXT: ret void |
| 286 | +; |
| 287 | +; CHECK-VF8-LABEL: @main_vf_vscale_x_2( |
| 288 | +; CHECK-VF8-NEXT: iter.check: |
| 289 | +; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] |
| 290 | +; CHECK-VF8: vector.main.loop.iter.check: |
| 291 | +; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| 292 | +; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 |
| 293 | +; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] |
| 294 | +; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] |
| 295 | +; CHECK-VF8: vector.ph: |
| 296 | +; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| 297 | +; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 |
| 298 | +; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] |
| 299 | +; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] |
| 300 | +; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] |
| 301 | +; CHECK-VF8: vector.body: |
| 302 | +; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 303 | +; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 |
| 304 | +; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() |
| 305 | +; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 |
| 306 | +; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 |
| 307 | +; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 |
| 308 | +; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] |
| 309 | +; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP4]] |
| 310 | +; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP9]] |
| 311 | +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 0 |
| 312 | +; CHECK-VF8-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <vscale x 2 x i64>* |
| 313 | +; CHECK-VF8-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64>* [[TMP13]], align 1 |
| 314 | +; CHECK-VF8-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() |
| 315 | +; CHECK-VF8-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 2 |
| 316 | +; CHECK-VF8-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 [[TMP15]] |
| 317 | +; CHECK-VF8-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <vscale x 2 x i64>* |
| 318 | +; CHECK-VF8-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64>* [[TMP17]], align 1 |
| 319 | +; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() |
| 320 | +; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 |
| 321 | +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] |
| 322 | +; CHECK-VF8-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| 323 | +; CHECK-VF8-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] |
| 324 | +; CHECK-VF8: middle.block: |
| 325 | +; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] |
| 326 | +; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] |
| 327 | +; CHECK-VF8: vec.epilog.iter.check: |
| 328 | +; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] |
| 329 | +; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 |
| 330 | +; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] |
| 331 | +; CHECK-VF8: vec.epilog.ph: |
| 332 | +; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| 333 | +; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] |
| 334 | +; CHECK-VF8: vec.epilog.vector.body: |
| 335 | +; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] |
| 336 | +; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 |
| 337 | +; CHECK-VF8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP21]] |
| 338 | +; CHECK-VF8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP22]], i32 0 |
| 339 | +; CHECK-VF8-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to <8 x i64>* |
| 340 | +; CHECK-VF8-NEXT: store <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64>* [[TMP24]], align 1 |
| 341 | +; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 |
| 342 | +; CHECK-VF8-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 |
| 343 | +; CHECK-VF8-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] |
| 344 | +; CHECK-VF8: vec.epilog.middle.block: |
| 345 | +; CHECK-VF8-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 |
| 346 | +; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] |
| 347 | +; CHECK-VF8: vec.epilog.scalar.ph: |
| 348 | +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] |
| 349 | +; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] |
| 350 | +; CHECK-VF8: for.body: |
| 351 | +; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] |
| 352 | +; CHECK-VF8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] |
| 353 | +; CHECK-VF8-NEXT: store i64 1, i64* [[ARRAYIDX]], align 1 |
| 354 | +; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| 355 | +; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 |
| 356 | +; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] |
| 357 | +; CHECK-VF8: exit.loopexit: |
| 358 | +; CHECK-VF8-NEXT: br label [[EXIT]] |
| 359 | +; CHECK-VF8: exit: |
| 360 | +; CHECK-VF8-NEXT: ret void |
| 361 | +; |
| 362 | +entry: |
| 363 | + br label %for.body |
| 364 | + |
| 365 | +for.body: |
| 366 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] |
| 367 | + %arrayidx = getelementptr inbounds i64, i64* %A, i64 %iv |
| 368 | + store i64 1, i64* %arrayidx, align 1 |
| 369 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 370 | + %exitcond = icmp ne i64 %iv.next, 1024 |
| 371 | + br i1 %exitcond, label %for.body, label %exit |
| 372 | + |
| 373 | +exit: |
| 374 | + ret void |
| 375 | +} |
| 376 | + |
198 | 377 | attributes #0 = { "target-features"="+sve" }
|
0 commit comments