|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | 2 | ; RUN: llc -mattr=+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes=true < %s | FileCheck %s --check-prefixes=COMMON,NEON |
3 | 3 | ; RUN: llc -mattr=+sve,+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes=true < %s | FileCheck %s --check-prefixes=COMMON,SVE |
4 | | -; RUN: llc -mattr=+sme -aarch64-enable-partial-reduce-nodes=true -force-streaming < %s | FileCheck %s --check-prefix=SME |
| 4 | +; RUN: llc -mattr=+sme,+i8mm -aarch64-enable-partial-reduce-nodes=true -force-streaming < %s | FileCheck %s --check-prefix=SME |
5 | 5 |
|
6 | 6 | target triple = "aarch64" |
7 | 7 |
|
@@ -418,23 +418,10 @@ define <4 x i32> @four_way_i8_i32_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr) |
418 | 418 | ; |
419 | 419 | ; SME-LABEL: four_way_i8_i32_vl128_usdot: |
420 | 420 | ; SME: // %bb.0: |
421 | | -; SME-NEXT: ptrue p0.s, vl4 |
422 | | -; SME-NEXT: ldr q2, [x0] |
423 | | -; SME-NEXT: mov w8, #4 // =0x4 |
424 | | -; SME-NEXT: ld1b { z0.s }, p0/z, [x1] |
425 | | -; SME-NEXT: ld1sb { z1.s }, p0/z, [x2] |
426 | | -; SME-NEXT: mad z0.s, p0/m, z1.s, z2.s |
427 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1, x8] |
428 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8] |
429 | | -; SME-NEXT: mov w8, #8 // =0x8 |
430 | | -; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s |
431 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1, x8] |
432 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8] |
433 | | -; SME-NEXT: mov w8, #12 // =0xc |
434 | | -; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s |
435 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1, x8] |
436 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8] |
437 | | -; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s |
| 421 | +; SME-NEXT: ldr q0, [x0] |
| 422 | +; SME-NEXT: ldr q1, [x1] |
| 423 | +; SME-NEXT: ldr q2, [x2] |
| 424 | +; SME-NEXT: usdot z0.s, z1.b, z2.b |
438 | 425 | ; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 |
439 | 426 | ; SME-NEXT: ret |
440 | 427 | %acc = load <4 x i32>, ptr %accptr |
@@ -491,41 +478,11 @@ define <8 x i32> @four_way_i8_i32_vl128_double_width_usdot(ptr %accptr, ptr %upt |
491 | 478 | ; |
492 | 479 | ; SME-LABEL: four_way_i8_i32_vl128_double_width_usdot: |
493 | 480 | ; SME: // %bb.0: |
494 | | -; SME-NEXT: ptrue p0.s, vl4 |
495 | | -; SME-NEXT: mov w8, #16 // =0x10 |
496 | | -; SME-NEXT: mov w9, #4 // =0x4 |
497 | | -; SME-NEXT: ldp q5, q4, [x0] |
498 | | -; SME-NEXT: ld1b { z0.s }, p0/z, [x1, x8] |
499 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1] |
500 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8] |
501 | | -; SME-NEXT: ld1sb { z3.s }, p0/z, [x2] |
502 | | -; SME-NEXT: mov w8, #20 // =0x14 |
503 | | -; SME-NEXT: ld1b { z6.s }, p0/z, [x1, x8] |
504 | | -; SME-NEXT: mad z0.s, p0/m, z2.s, z4.s |
505 | | -; SME-NEXT: ld1b { z2.s }, p0/z, [x1, x9] |
506 | | -; SME-NEXT: ld1sb { z4.s }, p0/z, [x2, x9] |
507 | | -; SME-NEXT: mad z1.s, p0/m, z3.s, z5.s |
508 | | -; SME-NEXT: ld1sb { z3.s }, p0/z, [x2, x8] |
509 | | -; SME-NEXT: mov w8, #24 // =0x18 |
510 | | -; SME-NEXT: mov w9, #8 // =0x8 |
511 | | -; SME-NEXT: ld1b { z5.s }, p0/z, [x1, x8] |
512 | | -; SME-NEXT: mla z0.s, p0/m, z3.s, z6.s |
513 | | -; SME-NEXT: ld1sb { z3.s }, p0/z, [x2, x8] |
514 | | -; SME-NEXT: mov w8, #28 // =0x1c |
515 | | -; SME-NEXT: mla z1.s, p0/m, z4.s, z2.s |
516 | | -; SME-NEXT: ld1b { z2.s }, p0/z, [x1, x9] |
517 | | -; SME-NEXT: ld1sb { z4.s }, p0/z, [x2, x9] |
518 | | -; SME-NEXT: mov w9, #12 // =0xc |
519 | | -; SME-NEXT: ld1b { z6.s }, p0/z, [x1, x8] |
520 | | -; SME-NEXT: mla z1.s, p0/m, z4.s, z2.s |
521 | | -; SME-NEXT: movprfx z2, z0 |
522 | | -; SME-NEXT: mla z2.s, p0/m, z3.s, z5.s |
523 | | -; SME-NEXT: ld1b { z0.s }, p0/z, [x1, x9] |
524 | | -; SME-NEXT: ld1sb { z3.s }, p0/z, [x2, x8] |
525 | | -; SME-NEXT: ld1sb { z4.s }, p0/z, [x2, x9] |
526 | | -; SME-NEXT: mad z0.s, p0/m, z4.s, z1.s |
527 | | -; SME-NEXT: movprfx z1, z2 |
528 | | -; SME-NEXT: mla z1.s, p0/m, z3.s, z6.s |
| 481 | +; SME-NEXT: ldp q0, q1, [x0] |
| 482 | +; SME-NEXT: ldp q3, q2, [x1] |
| 483 | +; SME-NEXT: ldp q5, q4, [x2] |
| 484 | +; SME-NEXT: usdot z0.s, z3.b, z5.b |
| 485 | +; SME-NEXT: usdot z1.s, z2.b, z4.b |
529 | 486 | ; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 |
530 | 487 | ; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 |
531 | 488 | ; SME-NEXT: ret |
@@ -610,20 +567,10 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr) |
610 | 567 | ; |
611 | 568 | ; SME-LABEL: four_way_i8_i32_vl256_usdot: |
612 | 569 | ; SME: // %bb.0: |
613 | | -; SME-NEXT: ptrue p0.s |
614 | 570 | ; SME-NEXT: ldr z0, [x0] |
615 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1] |
616 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2] |
617 | | -; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s |
618 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1, #1, mul vl] |
619 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, #1, mul vl] |
620 | | -; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s |
621 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1, #2, mul vl] |
622 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, #2, mul vl] |
623 | | -; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s |
624 | | -; SME-NEXT: ld1b { z1.s }, p0/z, [x1, #3, mul vl] |
625 | | -; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, #3, mul vl] |
626 | | -; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s |
| 571 | +; SME-NEXT: ldr z1, [x1] |
| 572 | +; SME-NEXT: ldr z2, [x2] |
| 573 | +; SME-NEXT: usdot z0.s, z1.b, z2.b |
627 | 574 | ; SME-NEXT: mov z1.d, z0.d |
628 | 575 | ; SME-NEXT: ext z1.b, z1.b, z0.b, #16 |
629 | 576 | ; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 |
|
0 commit comments