|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
2 | 2 | ; RUN: opt -p loop-unroll -mtriple riscv64 -mattr=+v,+f -S %s | FileCheck %s --check-prefixes=COMMON,CHECK |
3 | | -; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-s76 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE |
| 3 | +; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-p870 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE |
4 | 4 |
|
5 | 5 | define void @reverse(ptr %dst, ptr %src, i64 %len) { |
6 | 6 | ; CHECK-LABEL: define void @reverse( |
@@ -248,7 +248,7 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) { |
248 | 248 | ; SIFIVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer |
249 | 249 | ; SIFIVE-NEXT: br label %[[VECTOR_BODY:.*]] |
250 | 250 | ; SIFIVE: [[VECTOR_BODY]]: |
251 | | -; SIFIVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 251 | +; SIFIVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT_15:%.*]], %[[VECTOR_BODY]] ] |
252 | 252 | ; SIFIVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] |
253 | 253 | ; SIFIVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 |
254 | 254 | ; SIFIVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]] |
@@ -276,9 +276,93 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) { |
276 | 276 | ; SIFIVE-NEXT: [[WIDE_LOAD12_3:%.*]] = load <4 x float>, ptr [[TMP10]], align 4 |
277 | 277 | ; SIFIVE-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_3]], <4 x float> [[WIDE_LOAD12_3]]) |
278 | 278 | ; SIFIVE-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4 |
279 | | -; SIFIVE-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 16 |
280 | | -; SIFIVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 |
281 | | -; SIFIVE-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[VECTOR_BODY]] |
| 279 | +; SIFIVE-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16 |
| 280 | +; SIFIVE-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT]] |
| 281 | +; SIFIVE-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP49]], align 4 |
| 282 | +; SIFIVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT]] |
| 283 | +; SIFIVE-NEXT: [[WIDE_LOAD12_4:%.*]] = load <4 x float>, ptr [[TMP13]], align 4 |
| 284 | +; SIFIVE-NEXT: [[TMP14:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_4]], <4 x float> [[WIDE_LOAD12_4]]) |
| 285 | +; SIFIVE-NEXT: store <4 x float> [[TMP14]], ptr [[TMP13]], align 4 |
| 286 | +; SIFIVE-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX]], 20 |
| 287 | +; SIFIVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_4]] |
| 288 | +; SIFIVE-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 |
| 289 | +; SIFIVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_4]] |
| 290 | +; SIFIVE-NEXT: [[WIDE_LOAD12_5:%.*]] = load <4 x float>, ptr [[TMP16]], align 4 |
| 291 | +; SIFIVE-NEXT: [[TMP17:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_5]], <4 x float> [[WIDE_LOAD12_5]]) |
| 292 | +; SIFIVE-NEXT: store <4 x float> [[TMP17]], ptr [[TMP16]], align 4 |
| 293 | +; SIFIVE-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX]], 24 |
| 294 | +; SIFIVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_5]] |
| 295 | +; SIFIVE-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 |
| 296 | +; SIFIVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_5]] |
| 297 | +; SIFIVE-NEXT: [[WIDE_LOAD12_6:%.*]] = load <4 x float>, ptr [[TMP19]], align 4 |
| 298 | +; SIFIVE-NEXT: [[TMP20:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_6]], <4 x float> [[WIDE_LOAD12_6]]) |
| 299 | +; SIFIVE-NEXT: store <4 x float> [[TMP20]], ptr [[TMP19]], align 4 |
| 300 | +; SIFIVE-NEXT: [[INDEX_NEXT_6:%.*]] = add nuw nsw i64 [[INDEX]], 28 |
| 301 | +; SIFIVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_6]] |
| 302 | +; SIFIVE-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP21]], align 4 |
| 303 | +; SIFIVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_6]] |
| 304 | +; SIFIVE-NEXT: [[WIDE_LOAD12_7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 |
| 305 | +; SIFIVE-NEXT: [[TMP23:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_7]], <4 x float> [[WIDE_LOAD12_7]]) |
| 306 | +; SIFIVE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP22]], align 4 |
| 307 | +; SIFIVE-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX]], 32 |
| 308 | +; SIFIVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_7]] |
| 309 | +; SIFIVE-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP24]], align 4 |
| 310 | +; SIFIVE-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_7]] |
| 311 | +; SIFIVE-NEXT: [[WIDE_LOAD12_8:%.*]] = load <4 x float>, ptr [[TMP25]], align 4 |
| 312 | +; SIFIVE-NEXT: [[TMP26:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_8]], <4 x float> [[WIDE_LOAD12_8]]) |
| 313 | +; SIFIVE-NEXT: store <4 x float> [[TMP26]], ptr [[TMP25]], align 4 |
| 314 | +; SIFIVE-NEXT: [[INDEX_NEXT_8:%.*]] = add nuw nsw i64 [[INDEX]], 36 |
| 315 | +; SIFIVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_8]] |
| 316 | +; SIFIVE-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 |
| 317 | +; SIFIVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_8]] |
| 318 | +; SIFIVE-NEXT: [[WIDE_LOAD12_9:%.*]] = load <4 x float>, ptr [[TMP28]], align 4 |
| 319 | +; SIFIVE-NEXT: [[TMP29:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_9]], <4 x float> [[WIDE_LOAD12_9]]) |
| 320 | +; SIFIVE-NEXT: store <4 x float> [[TMP29]], ptr [[TMP28]], align 4 |
| 321 | +; SIFIVE-NEXT: [[INDEX_NEXT_9:%.*]] = add nuw nsw i64 [[INDEX]], 40 |
| 322 | +; SIFIVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_9]] |
| 323 | +; SIFIVE-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x float>, ptr [[TMP30]], align 4 |
| 324 | +; SIFIVE-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_9]] |
| 325 | +; SIFIVE-NEXT: [[WIDE_LOAD12_10:%.*]] = load <4 x float>, ptr [[TMP31]], align 4 |
| 326 | +; SIFIVE-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_10]], <4 x float> [[WIDE_LOAD12_10]]) |
| 327 | +; SIFIVE-NEXT: store <4 x float> [[TMP32]], ptr [[TMP31]], align 4 |
| 328 | +; SIFIVE-NEXT: [[INDEX_NEXT_10:%.*]] = add nuw nsw i64 [[INDEX]], 44 |
| 329 | +; SIFIVE-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_10]] |
| 330 | +; SIFIVE-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 |
| 331 | +; SIFIVE-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_10]] |
| 332 | +; SIFIVE-NEXT: [[WIDE_LOAD12_11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 |
| 333 | +; SIFIVE-NEXT: [[TMP35:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_11]], <4 x float> [[WIDE_LOAD12_11]]) |
| 334 | +; SIFIVE-NEXT: store <4 x float> [[TMP35]], ptr [[TMP34]], align 4 |
| 335 | +; SIFIVE-NEXT: [[INDEX_NEXT_11:%.*]] = add nuw nsw i64 [[INDEX]], 48 |
| 336 | +; SIFIVE-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_11]] |
| 337 | +; SIFIVE-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x float>, ptr [[TMP36]], align 4 |
| 338 | +; SIFIVE-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_11]] |
| 339 | +; SIFIVE-NEXT: [[WIDE_LOAD12_12:%.*]] = load <4 x float>, ptr [[TMP37]], align 4 |
| 340 | +; SIFIVE-NEXT: [[TMP38:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_12]], <4 x float> [[WIDE_LOAD12_12]]) |
| 341 | +; SIFIVE-NEXT: store <4 x float> [[TMP38]], ptr [[TMP37]], align 4 |
| 342 | +; SIFIVE-NEXT: [[INDEX_NEXT_12:%.*]] = add nuw nsw i64 [[INDEX]], 52 |
| 343 | +; SIFIVE-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_12]] |
| 344 | +; SIFIVE-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x float>, ptr [[TMP39]], align 4 |
| 345 | +; SIFIVE-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_12]] |
| 346 | +; SIFIVE-NEXT: [[WIDE_LOAD12_13:%.*]] = load <4 x float>, ptr [[TMP40]], align 4 |
| 347 | +; SIFIVE-NEXT: [[TMP41:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_13]], <4 x float> [[WIDE_LOAD12_13]]) |
| 348 | +; SIFIVE-NEXT: store <4 x float> [[TMP41]], ptr [[TMP40]], align 4 |
| 349 | +; SIFIVE-NEXT: [[INDEX_NEXT_13:%.*]] = add nuw nsw i64 [[INDEX]], 56 |
| 350 | +; SIFIVE-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_13]] |
| 351 | +; SIFIVE-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x float>, ptr [[TMP42]], align 4 |
| 352 | +; SIFIVE-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_13]] |
| 353 | +; SIFIVE-NEXT: [[WIDE_LOAD12_14:%.*]] = load <4 x float>, ptr [[TMP43]], align 4 |
| 354 | +; SIFIVE-NEXT: [[TMP44:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_14]], <4 x float> [[WIDE_LOAD12_14]]) |
| 355 | +; SIFIVE-NEXT: store <4 x float> [[TMP44]], ptr [[TMP43]], align 4 |
| 356 | +; SIFIVE-NEXT: [[INDEX_NEXT_14:%.*]] = add nuw nsw i64 [[INDEX]], 60 |
| 357 | +; SIFIVE-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_14]] |
| 358 | +; SIFIVE-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x float>, ptr [[TMP45]], align 4 |
| 359 | +; SIFIVE-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_14]] |
| 360 | +; SIFIVE-NEXT: [[WIDE_LOAD12_15:%.*]] = load <4 x float>, ptr [[TMP46]], align 4 |
| 361 | +; SIFIVE-NEXT: [[TMP47:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_15]], <4 x float> [[WIDE_LOAD12_15]]) |
| 362 | +; SIFIVE-NEXT: store <4 x float> [[TMP47]], ptr [[TMP46]], align 4 |
| 363 | +; SIFIVE-NEXT: [[INDEX_NEXT_15]] = add nuw nsw i64 [[INDEX]], 64 |
| 364 | +; SIFIVE-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT_15]], 1024 |
| 365 | +; SIFIVE-NEXT: br i1 [[TMP48]], label %[[EXIT:.*]], label %[[VECTOR_BODY]] |
282 | 366 | ; SIFIVE: [[EXIT]]: |
283 | 367 | ; SIFIVE-NEXT: ret void |
284 | 368 | ; |
@@ -345,8 +429,6 @@ vector.body: ; preds = %vector.body, %entry |
345 | 429 | exit: ; preds = %vector.body |
346 | 430 | ret void |
347 | 431 | } |
348 | | -!0 = !{!0, !1} |
349 | | -!1 = !{!"llvm.loop.isvectorized", i32 1} |
350 | 432 |
|
351 | 433 | ; On SiFive we should runtime unroll the scalar epilogue loop, but not the |
352 | 434 | ; vector loop. |
@@ -587,6 +669,127 @@ exit: |
587 | 669 | ret void |
588 | 670 | } |
589 | 671 |
|
| 672 | +define void @vector_operands(ptr %p, i64 %n) { |
| 673 | +; CHECK-LABEL: define void @vector_operands( |
| 674 | +; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| 675 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 676 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 677 | +; CHECK: [[VECTOR_BODY]]: |
| 678 | +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 679 | +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 680 | +; CHECK-NEXT: [[VL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) |
| 681 | +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]] |
| 682 | +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR]], <vscale x 2 x i1> splat (i1 true), i32 [[VL]]) |
| 683 | +; CHECK-NEXT: [[VL_ZEXT:%.*]] = zext i32 [[VL]] to i64 |
| 684 | +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[VL_ZEXT]], [[EVL_BASED_IV]] |
| 685 | +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[VL_ZEXT]] |
| 686 | +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 |
| 687 | +; CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2]] |
| 688 | +; CHECK: [[EXIT]]: |
| 689 | +; CHECK-NEXT: ret void |
| 690 | +; |
| 691 | +; SIFIVE-LABEL: define void @vector_operands( |
| 692 | +; SIFIVE-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| 693 | +; SIFIVE-NEXT: [[ENTRY:.*]]: |
| 694 | +; SIFIVE-NEXT: br label %[[VECTOR_BODY:.*]] |
| 695 | +; SIFIVE: [[VECTOR_BODY]]: |
| 696 | +; SIFIVE-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT_7:%.*]], %[[VECTOR_BODY_7:.*]] ] |
| 697 | +; SIFIVE-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[AVL_NEXT_7:%.*]], %[[VECTOR_BODY_7]] ] |
| 698 | +; SIFIVE-NEXT: [[VL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) |
| 699 | +; SIFIVE-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]] |
| 700 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR]], <vscale x 2 x i1> splat (i1 true), i32 [[VL]]) |
| 701 | +; SIFIVE-NEXT: [[VL_ZEXT:%.*]] = zext i32 [[VL]] to i64 |
| 702 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT:%.*]] = add nuw i64 [[VL_ZEXT]], [[EVL_BASED_IV]] |
| 703 | +; SIFIVE-NEXT: [[AVL_NEXT:%.*]] = sub nuw i64 [[AVL]], [[VL_ZEXT]] |
| 704 | +; SIFIVE-NEXT: [[TMP0:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 |
| 705 | +; SIFIVE-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[VECTOR_BODY_1:.*]], !llvm.loop [[LOOP2]] |
| 706 | +; SIFIVE: [[VECTOR_BODY_1]]: |
| 707 | +; SIFIVE-NEXT: [[VL_1:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT]], i32 2, i1 true) |
| 708 | +; SIFIVE-NEXT: [[ADDR_1:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT]] |
| 709 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_1]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_1]]) |
| 710 | +; SIFIVE-NEXT: [[VL_ZEXT_1:%.*]] = zext i32 [[VL_1]] to i64 |
| 711 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT_1:%.*]] = add nuw i64 [[VL_ZEXT_1]], [[INDEX_EVL_NEXT]] |
| 712 | +; SIFIVE-NEXT: [[AVL_NEXT_1:%.*]] = sub nuw i64 [[AVL_NEXT]], [[VL_ZEXT_1]] |
| 713 | +; SIFIVE-NEXT: [[TMP1:%.*]] = icmp eq i64 [[AVL_NEXT_1]], 0 |
| 714 | +; SIFIVE-NEXT: br i1 [[TMP1]], label %[[EXIT]], label %[[VECTOR_BODY_2:.*]], !llvm.loop [[LOOP2]] |
| 715 | +; SIFIVE: [[VECTOR_BODY_2]]: |
| 716 | +; SIFIVE-NEXT: [[VL_2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_1]], i32 2, i1 true) |
| 717 | +; SIFIVE-NEXT: [[ADDR_2:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_1]] |
| 718 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_2]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_2]]) |
| 719 | +; SIFIVE-NEXT: [[VL_ZEXT_2:%.*]] = zext i32 [[VL_2]] to i64 |
| 720 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT_2:%.*]] = add nuw i64 [[VL_ZEXT_2]], [[INDEX_EVL_NEXT_1]] |
| 721 | +; SIFIVE-NEXT: [[AVL_NEXT_2:%.*]] = sub nuw i64 [[AVL_NEXT_1]], [[VL_ZEXT_2]] |
| 722 | +; SIFIVE-NEXT: [[TMP2:%.*]] = icmp eq i64 [[AVL_NEXT_2]], 0 |
| 723 | +; SIFIVE-NEXT: br i1 [[TMP2]], label %[[EXIT]], label %[[VECTOR_BODY_3:.*]], !llvm.loop [[LOOP2]] |
| 724 | +; SIFIVE: [[VECTOR_BODY_3]]: |
| 725 | +; SIFIVE-NEXT: [[VL_3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_2]], i32 2, i1 true) |
| 726 | +; SIFIVE-NEXT: [[ADDR_3:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_2]] |
| 727 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_3]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_3]]) |
| 728 | +; SIFIVE-NEXT: [[VL_ZEXT_3:%.*]] = zext i32 [[VL_3]] to i64 |
| 729 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT_3:%.*]] = add nuw i64 [[VL_ZEXT_3]], [[INDEX_EVL_NEXT_2]] |
| 730 | +; SIFIVE-NEXT: [[AVL_NEXT_3:%.*]] = sub nuw i64 [[AVL_NEXT_2]], [[VL_ZEXT_3]] |
| 731 | +; SIFIVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[AVL_NEXT_3]], 0 |
| 732 | +; SIFIVE-NEXT: br i1 [[TMP3]], label %[[EXIT]], label %[[VECTOR_BODY_4:.*]], !llvm.loop [[LOOP2]] |
| 733 | +; SIFIVE: [[VECTOR_BODY_4]]: |
| 734 | +; SIFIVE-NEXT: [[VL_4:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_3]], i32 2, i1 true) |
| 735 | +; SIFIVE-NEXT: [[ADDR_4:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_3]] |
| 736 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_4]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_4]]) |
| 737 | +; SIFIVE-NEXT: [[VL_ZEXT_4:%.*]] = zext i32 [[VL_4]] to i64 |
| 738 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT_4:%.*]] = add nuw i64 [[VL_ZEXT_4]], [[INDEX_EVL_NEXT_3]] |
| 739 | +; SIFIVE-NEXT: [[AVL_NEXT_4:%.*]] = sub nuw i64 [[AVL_NEXT_3]], [[VL_ZEXT_4]] |
| 740 | +; SIFIVE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[AVL_NEXT_4]], 0 |
| 741 | +; SIFIVE-NEXT: br i1 [[TMP4]], label %[[EXIT]], label %[[VECTOR_BODY_5:.*]], !llvm.loop [[LOOP2]] |
| 742 | +; SIFIVE: [[VECTOR_BODY_5]]: |
| 743 | +; SIFIVE-NEXT: [[VL_5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_4]], i32 2, i1 true) |
| 744 | +; SIFIVE-NEXT: [[ADDR_5:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_4]] |
| 745 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_5]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_5]]) |
| 746 | +; SIFIVE-NEXT: [[VL_ZEXT_5:%.*]] = zext i32 [[VL_5]] to i64 |
| 747 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT_5:%.*]] = add nuw i64 [[VL_ZEXT_5]], [[INDEX_EVL_NEXT_4]] |
| 748 | +; SIFIVE-NEXT: [[AVL_NEXT_5:%.*]] = sub nuw i64 [[AVL_NEXT_4]], [[VL_ZEXT_5]] |
| 749 | +; SIFIVE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT_5]], 0 |
| 750 | +; SIFIVE-NEXT: br i1 [[TMP5]], label %[[EXIT]], label %[[VECTOR_BODY_6:.*]], !llvm.loop [[LOOP2]] |
| 751 | +; SIFIVE: [[VECTOR_BODY_6]]: |
| 752 | +; SIFIVE-NEXT: [[VL_6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_5]], i32 2, i1 true) |
| 753 | +; SIFIVE-NEXT: [[ADDR_6:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_5]] |
| 754 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_6]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_6]]) |
| 755 | +; SIFIVE-NEXT: [[VL_ZEXT_6:%.*]] = zext i32 [[VL_6]] to i64 |
| 756 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT_6:%.*]] = add nuw i64 [[VL_ZEXT_6]], [[INDEX_EVL_NEXT_5]] |
| 757 | +; SIFIVE-NEXT: [[AVL_NEXT_6:%.*]] = sub nuw i64 [[AVL_NEXT_5]], [[VL_ZEXT_6]] |
| 758 | +; SIFIVE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT_6]], 0 |
| 759 | +; SIFIVE-NEXT: br i1 [[TMP6]], label %[[EXIT]], label %[[VECTOR_BODY_7]], !llvm.loop [[LOOP2]] |
| 760 | +; SIFIVE: [[VECTOR_BODY_7]]: |
| 761 | +; SIFIVE-NEXT: [[VL_7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_6]], i32 2, i1 true) |
| 762 | +; SIFIVE-NEXT: [[ADDR_7:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_6]] |
| 763 | +; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_7]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_7]]) |
| 764 | +; SIFIVE-NEXT: [[VL_ZEXT_7:%.*]] = zext i32 [[VL_7]] to i64 |
| 765 | +; SIFIVE-NEXT: [[INDEX_EVL_NEXT_7]] = add nuw i64 [[VL_ZEXT_7]], [[INDEX_EVL_NEXT_6]] |
| 766 | +; SIFIVE-NEXT: [[AVL_NEXT_7]] = sub nuw i64 [[AVL_NEXT_6]], [[VL_ZEXT_7]] |
| 767 | +; SIFIVE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT_7]], 0 |
| 768 | +; SIFIVE-NEXT: br i1 [[TMP7]], label %[[EXIT]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2]] |
| 769 | +; SIFIVE: [[EXIT]]: |
| 770 | +; SIFIVE-NEXT: ret void |
| 771 | +; |
| 772 | +entry: |
| 773 | + br label %vector.body |
| 774 | + |
| 775 | +vector.body: |
| 776 | + %evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ] |
| 777 | + %avl = phi i64 [ %n, %entry ], [ %avl.next, %vector.body ] |
| 778 | + %vl = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true) |
| 779 | + %addr = getelementptr i64, ptr %p, i64 %evl.based.iv |
| 780 | + call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> splat (i64 0), ptr align 8 %addr, <vscale x 2 x i1> splat (i1 true), i32 %vl) |
| 781 | + %vl.zext = zext i32 %vl to i64 |
| 782 | + %index.evl.next = add nuw i64 %vl.zext, %evl.based.iv |
| 783 | + %avl.next = sub nuw i64 %avl, %vl.zext |
| 784 | + %3 = icmp eq i64 %avl.next, 0 |
| 785 | + br i1 %3, label %exit, label %vector.body, !llvm.loop !2 |
| 786 | + |
| 787 | +exit: |
| 788 | + ret void |
| 789 | +} |
| 790 | + |
| 791 | +!0 = !{!0, !1} |
| 792 | +!1 = !{!"llvm.loop.isvectorized", i32 1} |
590 | 793 | !2 = distinct !{!2, !1} |
591 | 794 | !3 = distinct !{!3, !1} |
592 | 795 |
|
|
0 commit comments