|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | | -; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16 |
| 2 | +; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16,NOB16B16-NONSTREAMING |
3 | 3 | ; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16 |
4 | | -; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16 |
| 4 | +; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16,NOB16B16-STREAMING |
5 | 5 | ; RUN: llc -mattr=+sme2,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16 |
6 | 6 |
|
7 | 7 | target triple = "aarch64-unknown-linux-gnu" |
@@ -520,64 +520,82 @@ define <vscale x 8 x bfloat> @fmla_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x |
520 | 520 | ; |
521 | 521 |
|
522 | 522 | define <vscale x 2 x bfloat> @fmul_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
523 | | -; NOB16B16-LABEL: fmul_nxv2bf16: |
524 | | -; NOB16B16: // %bb.0: |
525 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
526 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
527 | | -; NOB16B16-NEXT: ptrue p0.d |
528 | | -; NOB16B16-NEXT: fmul z0.s, p0/m, z0.s, z1.s |
529 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
530 | | -; NOB16B16-NEXT: ret |
| 523 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv2bf16: |
| 524 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 525 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 526 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.d |
| 527 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 528 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 529 | +; NOB16B16-NONSTREAMING-NEXT: ret |
531 | 530 | ; |
532 | 531 | ; B16B16-LABEL: fmul_nxv2bf16: |
533 | 532 | ; B16B16: // %bb.0: |
534 | 533 | ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
535 | 534 | ; B16B16-NEXT: ret |
| 535 | +; |
| 536 | +; NOB16B16-STREAMING-LABEL: fmul_nxv2bf16: |
| 537 | +; NOB16B16-STREAMING: // %bb.0: |
| 538 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 539 | +; NOB16B16-STREAMING-NEXT: ptrue p0.d |
| 540 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 541 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 542 | +; NOB16B16-STREAMING-NEXT: ret |
536 | 543 | %res = fmul <vscale x 2 x bfloat> %a, %b |
537 | 544 | ret <vscale x 2 x bfloat> %res |
538 | 545 | } |
539 | 546 |
|
540 | 547 | define <vscale x 4 x bfloat> @fmul_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
541 | | -; NOB16B16-LABEL: fmul_nxv4bf16: |
542 | | -; NOB16B16: // %bb.0: |
543 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
544 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
545 | | -; NOB16B16-NEXT: ptrue p0.s |
546 | | -; NOB16B16-NEXT: fmul z0.s, z0.s, z1.s |
547 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
548 | | -; NOB16B16-NEXT: ret |
| 548 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv4bf16: |
| 549 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 550 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 551 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.s |
| 552 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 553 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 554 | +; NOB16B16-NONSTREAMING-NEXT: ret |
549 | 555 | ; |
550 | 556 | ; B16B16-LABEL: fmul_nxv4bf16: |
551 | 557 | ; B16B16: // %bb.0: |
552 | 558 | ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
553 | 559 | ; B16B16-NEXT: ret |
| 560 | +; |
| 561 | +; NOB16B16-STREAMING-LABEL: fmul_nxv4bf16: |
| 562 | +; NOB16B16-STREAMING: // %bb.0: |
| 563 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 564 | +; NOB16B16-STREAMING-NEXT: ptrue p0.s |
| 565 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 566 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 567 | +; NOB16B16-STREAMING-NEXT: ret |
554 | 568 | %res = fmul <vscale x 4 x bfloat> %a, %b |
555 | 569 | ret <vscale x 4 x bfloat> %res |
556 | 570 | } |
557 | 571 |
|
558 | 572 | define <vscale x 8 x bfloat> @fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
559 | | -; NOB16B16-LABEL: fmul_nxv8bf16: |
560 | | -; NOB16B16: // %bb.0: |
561 | | -; NOB16B16-NEXT: uunpkhi z2.s, z1.h |
562 | | -; NOB16B16-NEXT: uunpkhi z3.s, z0.h |
563 | | -; NOB16B16-NEXT: uunpklo z1.s, z1.h |
564 | | -; NOB16B16-NEXT: uunpklo z0.s, z0.h |
565 | | -; NOB16B16-NEXT: ptrue p0.s |
566 | | -; NOB16B16-NEXT: lsl z2.s, z2.s, #16 |
567 | | -; NOB16B16-NEXT: lsl z3.s, z3.s, #16 |
568 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
569 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
570 | | -; NOB16B16-NEXT: fmul z2.s, z3.s, z2.s |
571 | | -; NOB16B16-NEXT: fmul z0.s, z0.s, z1.s |
572 | | -; NOB16B16-NEXT: bfcvt z1.h, p0/m, z2.s |
573 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
574 | | -; NOB16B16-NEXT: uzp1 z0.h, z0.h, z1.h |
575 | | -; NOB16B16-NEXT: ret |
| 573 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv8bf16: |
| 574 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 575 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 576 | +; NOB16B16-NONSTREAMING-NEXT: movi v3.2d, #0000000000000000 |
| 577 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.s |
| 578 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 579 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalt z3.s, z0.h, z1.h |
| 580 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 581 | +; NOB16B16-NONSTREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s |
| 582 | +; NOB16B16-NONSTREAMING-NEXT: ret |
576 | 583 | ; |
577 | 584 | ; B16B16-LABEL: fmul_nxv8bf16: |
578 | 585 | ; B16B16: // %bb.0: |
579 | 586 | ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
580 | 587 | ; B16B16-NEXT: ret |
| 588 | +; |
| 589 | +; NOB16B16-STREAMING-LABEL: fmul_nxv8bf16: |
| 590 | +; NOB16B16-STREAMING: // %bb.0: |
| 591 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 592 | +; NOB16B16-STREAMING-NEXT: mov z3.s, #0 // =0x0 |
| 593 | +; NOB16B16-STREAMING-NEXT: ptrue p0.s |
| 594 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 595 | +; NOB16B16-STREAMING-NEXT: bfmlalt z3.s, z0.h, z1.h |
| 596 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 597 | +; NOB16B16-STREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s |
| 598 | +; NOB16B16-STREAMING-NEXT: ret |
581 | 599 | %res = fmul <vscale x 8 x bfloat> %a, %b |
582 | 600 | ret <vscale x 8 x bfloat> %res |
583 | 601 | } |
|
0 commit comments