|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | 2 | ; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NOI8MM |
3 | 3 | ; RUN: llc -mtriple aarch64 -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM,CHECK-NODOT |
4 | | -; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM |
| 4 | +; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM,CHECK-DOT-I8MM |
5 | 5 | ; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM,CHECK-NEWLOWERING-I8MM |
6 | 6 | ; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod -aarch64-enable-partial-reduce-nodes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NOI8MM,CHECK-NEWLOWERING-NOI8MM |
7 | 7 |
|
@@ -175,6 +175,11 @@ define <4 x i32> @usdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) { |
175 | 175 | ; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v1.8h |
176 | 176 | ; CHECK-NOI8MM-NEXT: ret |
177 | 177 | ; |
| 178 | +; CHECK-DOT-I8MM-LABEL: usdot: |
| 179 | +; CHECK-DOT-I8MM: // %bb.0: |
| 180 | +; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b |
| 181 | +; CHECK-DOT-I8MM-NEXT: ret |
| 182 | +; |
178 | 183 | ; CHECK-NEWLOWERING-I8MM-LABEL: usdot: |
179 | 184 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: |
180 | 185 | ; CHECK-NEWLOWERING-I8MM-NEXT: ushll v3.8h, v1.8b, #0 |
@@ -217,6 +222,22 @@ define <4 x i32> @usdot_in_loop(ptr %p1, ptr %p2){ |
217 | 222 | ; CHECK-NOI8MM-NEXT: // %bb.2: // %end |
218 | 223 | ; CHECK-NOI8MM-NEXT: ret |
219 | 224 | ; |
| 225 | +; CHECK-DOT-I8MM-LABEL: usdot_in_loop: |
| 226 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 227 | +; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000 |
| 228 | +; CHECK-DOT-I8MM-NEXT: mov x8, xzr |
| 229 | +; CHECK-DOT-I8MM-NEXT: .LBB6_1: // %vector.body |
| 230 | +; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1 |
| 231 | +; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8] |
| 232 | +; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8] |
| 233 | +; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b |
| 234 | +; CHECK-DOT-I8MM-NEXT: add x8, x8, #16 |
| 235 | +; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v3.16b, v2.16b |
| 236 | +; CHECK-DOT-I8MM-NEXT: cmp x8, #16 |
| 237 | +; CHECK-DOT-I8MM-NEXT: b.ne .LBB6_1 |
| 238 | +; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end |
| 239 | +; CHECK-DOT-I8MM-NEXT: ret |
| 240 | +; |
220 | 241 | ; CHECK-NEWLOWERING-I8MM-LABEL: usdot_in_loop: |
221 | 242 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
222 | 243 | ; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #0000000000000000 |
@@ -279,6 +300,11 @@ define <2 x i32> @usdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{ |
279 | 300 | ; CHECK-NOI8MM-NEXT: add v0.2s, v1.2s, v0.2s |
280 | 301 | ; CHECK-NOI8MM-NEXT: ret |
281 | 302 | ; |
| 303 | +; CHECK-DOT-I8MM-LABEL: usdot_narrow: |
| 304 | +; CHECK-DOT-I8MM: // %bb.0: |
| 305 | +; CHECK-DOT-I8MM-NEXT: usdot v0.2s, v1.8b, v2.8b |
| 306 | +; CHECK-DOT-I8MM-NEXT: ret |
| 307 | +; |
282 | 308 | ; CHECK-NEWLOWERING-I8MM-LABEL: usdot_narrow: |
283 | 309 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: |
284 | 310 | ; CHECK-NEWLOWERING-I8MM-NEXT: ushll v1.8h, v1.8b, #0 |
@@ -315,6 +341,11 @@ define <4 x i32> @sudot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) #0{ |
315 | 341 | ; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v1.8h |
316 | 342 | ; CHECK-NOI8MM-NEXT: ret |
317 | 343 | ; |
| 344 | +; CHECK-DOT-I8MM-LABEL: sudot: |
| 345 | +; CHECK-DOT-I8MM: // %bb.0: |
| 346 | +; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v2.16b, v1.16b |
| 347 | +; CHECK-DOT-I8MM-NEXT: ret |
| 348 | +; |
318 | 349 | ; CHECK-NEWLOWERING-I8MM-LABEL: sudot: |
319 | 350 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: |
320 | 351 | ; CHECK-NEWLOWERING-I8MM-NEXT: sshll v3.8h, v1.8b, #0 |
@@ -357,6 +388,22 @@ define <4 x i32> @sudot_in_loop(ptr %p1, ptr %p2){ |
357 | 388 | ; CHECK-NOI8MM-NEXT: // %bb.2: // %end |
358 | 389 | ; CHECK-NOI8MM-NEXT: ret |
359 | 390 | ; |
| 391 | +; CHECK-DOT-I8MM-LABEL: sudot_in_loop: |
| 392 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 393 | +; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000 |
| 394 | +; CHECK-DOT-I8MM-NEXT: mov x8, xzr |
| 395 | +; CHECK-DOT-I8MM-NEXT: .LBB9_1: // %vector.body |
| 396 | +; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1 |
| 397 | +; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8] |
| 398 | +; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8] |
| 399 | +; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b |
| 400 | +; CHECK-DOT-I8MM-NEXT: add x8, x8, #16 |
| 401 | +; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v2.16b, v3.16b |
| 402 | +; CHECK-DOT-I8MM-NEXT: cmp x8, #16 |
| 403 | +; CHECK-DOT-I8MM-NEXT: b.ne .LBB9_1 |
| 404 | +; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end |
| 405 | +; CHECK-DOT-I8MM-NEXT: ret |
| 406 | +; |
360 | 407 | ; CHECK-NEWLOWERING-I8MM-LABEL: sudot_in_loop: |
361 | 408 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
362 | 409 | ; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #0000000000000000 |
@@ -419,6 +466,11 @@ define <2 x i32> @sudot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{ |
419 | 466 | ; CHECK-NOI8MM-NEXT: add v0.2s, v1.2s, v0.2s |
420 | 467 | ; CHECK-NOI8MM-NEXT: ret |
421 | 468 | ; |
| 469 | +; CHECK-DOT-I8MM-LABEL: sudot_narrow: |
| 470 | +; CHECK-DOT-I8MM: // %bb.0: |
| 471 | +; CHECK-DOT-I8MM-NEXT: usdot v0.2s, v2.8b, v1.8b |
| 472 | +; CHECK-DOT-I8MM-NEXT: ret |
| 473 | +; |
422 | 474 | ; CHECK-NEWLOWERING-I8MM-LABEL: sudot_narrow: |
423 | 475 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: |
424 | 476 | ; CHECK-NEWLOWERING-I8MM-NEXT: sshll v1.8h, v1.8b, #0 |
@@ -461,6 +513,14 @@ define <4 x i64> @udot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) { |
461 | 513 | ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s |
462 | 514 | ; CHECK-NODOT-NEXT: ret |
463 | 515 | ; |
| 516 | +; CHECK-DOT-I8MM-LABEL: udot_8to64: |
| 517 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 518 | +; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000 |
| 519 | +; CHECK-DOT-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b |
| 520 | +; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s |
| 521 | +; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s |
| 522 | +; CHECK-DOT-I8MM-NEXT: ret |
| 523 | +; |
464 | 524 | ; CHECK-NEWLOWERING-I8MM-LABEL: udot_8to64: |
465 | 525 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
466 | 526 | ; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000 |
@@ -504,6 +564,14 @@ define <4 x i64> @sdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){ |
504 | 564 | ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s |
505 | 565 | ; CHECK-NODOT-NEXT: ret |
506 | 566 | ; |
| 567 | +; CHECK-DOT-I8MM-LABEL: sdot_8to64: |
| 568 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 569 | +; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000 |
| 570 | +; CHECK-DOT-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b |
| 571 | +; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s |
| 572 | +; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s |
| 573 | +; CHECK-DOT-I8MM-NEXT: ret |
| 574 | +; |
507 | 575 | ; CHECK-NEWLOWERING-I8MM-LABEL: sdot_8to64: |
508 | 576 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
509 | 577 | ; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000 |
@@ -553,6 +621,14 @@ define <4 x i64> @usdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){ |
553 | 621 | ; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s |
554 | 622 | ; CHECK-NOI8MM-NEXT: ret |
555 | 623 | ; |
| 624 | +; CHECK-DOT-I8MM-LABEL: usdot_8to64: |
| 625 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 626 | +; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000 |
| 627 | +; CHECK-DOT-I8MM-NEXT: usdot v4.4s, v2.16b, v3.16b |
| 628 | +; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s |
| 629 | +; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s |
| 630 | +; CHECK-DOT-I8MM-NEXT: ret |
| 631 | +; |
556 | 632 | ; CHECK-NEWLOWERING-I8MM-LABEL: usdot_8to64: |
557 | 633 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
558 | 634 | ; CHECK-NEWLOWERING-I8MM-NEXT: ushll v4.8h, v2.8b, #0 |
@@ -610,6 +686,14 @@ define <4 x i64> @sudot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) { |
610 | 686 | ; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s |
611 | 687 | ; CHECK-NOI8MM-NEXT: ret |
612 | 688 | ; |
| 689 | +; CHECK-DOT-I8MM-LABEL: sudot_8to64: |
| 690 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 691 | +; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000 |
| 692 | +; CHECK-DOT-I8MM-NEXT: usdot v4.4s, v3.16b, v2.16b |
| 693 | +; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s |
| 694 | +; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s |
| 695 | +; CHECK-DOT-I8MM-NEXT: ret |
| 696 | +; |
613 | 697 | ; CHECK-NEWLOWERING-I8MM-LABEL: sudot_8to64: |
614 | 698 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
615 | 699 | ; CHECK-NEWLOWERING-I8MM-NEXT: sshll v4.8h, v2.8b, #0 |
@@ -684,6 +768,22 @@ define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){ |
684 | 768 | ; CHECK-NODOT-NEXT: // %bb.2: // %end |
685 | 769 | ; CHECK-NODOT-NEXT: ret |
686 | 770 | ; |
| 771 | +; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_in_loop: |
| 772 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 773 | +; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000 |
| 774 | +; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1 |
| 775 | +; CHECK-DOT-I8MM-NEXT: mov x8, xzr |
| 776 | +; CHECK-DOT-I8MM-NEXT: .LBB16_1: // %vector.body |
| 777 | +; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1 |
| 778 | +; CHECK-DOT-I8MM-NEXT: ldr q3, [x0, x8] |
| 779 | +; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b |
| 780 | +; CHECK-DOT-I8MM-NEXT: add x8, x8, #16 |
| 781 | +; CHECK-DOT-I8MM-NEXT: cmp x8, #16 |
| 782 | +; CHECK-DOT-I8MM-NEXT: udot v1.4s, v3.16b, v2.16b |
| 783 | +; CHECK-DOT-I8MM-NEXT: b.ne .LBB16_1 |
| 784 | +; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end |
| 785 | +; CHECK-DOT-I8MM-NEXT: ret |
| 786 | +; |
687 | 787 | ; CHECK-NEWLOWERING-I8MM-LABEL: udot_no_bin_op_in_loop: |
688 | 788 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
689 | 789 | ; CHECK-NEWLOWERING-I8MM-NEXT: adrp x8, .LCPI16_0 |
@@ -854,6 +954,15 @@ define <4 x i64> @udot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){ |
854 | 954 | ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v3.4s |
855 | 955 | ; CHECK-NODOT-NEXT: ret |
856 | 956 | ; |
| 957 | +; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_8to64: |
| 958 | +; CHECK-DOT-I8MM: // %bb.0: |
| 959 | +; CHECK-DOT-I8MM-NEXT: movi v3.16b, #1 |
| 960 | +; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000 |
| 961 | +; CHECK-DOT-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b |
| 962 | +; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s |
| 963 | +; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s |
| 964 | +; CHECK-DOT-I8MM-NEXT: ret |
| 965 | +; |
857 | 966 | ; CHECK-NEWLOWERING-I8MM-LABEL: udot_no_bin_op_8to64: |
858 | 967 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: |
859 | 968 | ; CHECK-NEWLOWERING-I8MM-NEXT: ushll v3.8h, v2.8b, #0 |
@@ -913,6 +1022,15 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){ |
913 | 1022 | ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v3.4s |
914 | 1023 | ; CHECK-NODOT-NEXT: ret |
915 | 1024 | ; |
| 1025 | +; CHECK-DOT-I8MM-LABEL: sdot_no_bin_op_8to64: |
| 1026 | +; CHECK-DOT-I8MM: // %bb.0: |
| 1027 | +; CHECK-DOT-I8MM-NEXT: movi v3.16b, #1 |
| 1028 | +; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000 |
| 1029 | +; CHECK-DOT-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b |
| 1030 | +; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s |
| 1031 | +; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s |
| 1032 | +; CHECK-DOT-I8MM-NEXT: ret |
| 1033 | +; |
916 | 1034 | ; CHECK-NEWLOWERING-I8MM-LABEL: sdot_no_bin_op_8to64: |
917 | 1035 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: |
918 | 1036 | ; CHECK-NEWLOWERING-I8MM-NEXT: sshll v3.8h, v2.8b, #0 |
@@ -1101,6 +1219,25 @@ define <4 x i32> @usdot_multiple_zext_users(ptr %p1, ptr %p2, ptr %p3) { |
1101 | 1219 | ; CHECK-NOI8MM-NEXT: add v0.4s, v1.4s, v0.4s |
1102 | 1220 | ; CHECK-NOI8MM-NEXT: ret |
1103 | 1221 | ; |
| 1222 | +; CHECK-DOT-I8MM-LABEL: usdot_multiple_zext_users: |
| 1223 | +; CHECK-DOT-I8MM: // %bb.0: // %entry |
| 1224 | +; CHECK-DOT-I8MM-NEXT: movi v0.2d, #0000000000000000 |
| 1225 | +; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000 |
| 1226 | +; CHECK-DOT-I8MM-NEXT: mov x8, xzr |
| 1227 | +; CHECK-DOT-I8MM-NEXT: .LBB28_1: // %vector.body |
| 1228 | +; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1 |
| 1229 | +; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8] |
| 1230 | +; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8] |
| 1231 | +; CHECK-DOT-I8MM-NEXT: ldr q4, [x2, x8] |
| 1232 | +; CHECK-DOT-I8MM-NEXT: add x8, x8, #16 |
| 1233 | +; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v4.16b, v2.16b |
| 1234 | +; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v4.16b, v3.16b |
| 1235 | +; CHECK-DOT-I8MM-NEXT: cmp x8, #1024 |
| 1236 | +; CHECK-DOT-I8MM-NEXT: b.ne .LBB28_1 |
| 1237 | +; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end |
| 1238 | +; CHECK-DOT-I8MM-NEXT: add v0.4s, v1.4s, v0.4s |
| 1239 | +; CHECK-DOT-I8MM-NEXT: ret |
| 1240 | +; |
1104 | 1241 | ; CHECK-NEWLOWERING-I8MM-LABEL: usdot_multiple_zext_users: |
1105 | 1242 | ; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry |
1106 | 1243 | ; CHECK-NEWLOWERING-I8MM-NEXT: movi v0.2d, #0000000000000000 |
|
0 commit comments