|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | | -; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16 |
| 2 | +; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16,NOB16B16-NONSTREAMING |
3 | 3 | ; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16 |
4 | | -; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16 |
| 4 | +; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16,NOB16B16-STREAMING |
5 | 5 | ; RUN: llc -mattr=+sme2,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16 |
6 | 6 |
|
7 | 7 | target triple = "aarch64-unknown-linux-gnu" |
@@ -530,49 +530,80 @@ define <vscale x 2 x bfloat> @fmul_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x |
530 | 530 | ; B16B16-NEXT: ptrue p0.d |
531 | 531 | ; B16B16-NEXT: bfmul z0.h, p0/m, z0.h, z1.h |
532 | 532 | ; B16B16-NEXT: ret |
533 | | - %res = fmul <vscale x 2 x bfloat> %a, %b |
| 533 | + %res = fmul nsz <vscale x 2 x bfloat> %a, %b |
534 | 534 | ret <vscale x 2 x bfloat> %res |
535 | 535 | } |
536 | 536 |
|
537 | 537 | define <vscale x 4 x bfloat> @fmul_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
538 | | -; NOB16B16-LABEL: fmul_nxv4bf16: |
539 | | -; NOB16B16: // %bb.0: |
540 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
541 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
542 | | -; NOB16B16-NEXT: ptrue p0.s |
543 | | -; NOB16B16-NEXT: fmul z0.s, z0.s, z1.s |
544 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
545 | | -; NOB16B16-NEXT: ret |
| 538 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv4bf16: |
| 539 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 540 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 541 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.s |
| 542 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 543 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 544 | +; NOB16B16-NONSTREAMING-NEXT: ret |
546 | 545 | ; |
547 | 546 | ; B16B16-LABEL: fmul_nxv4bf16: |
548 | 547 | ; B16B16: // %bb.0: |
549 | 548 | ; B16B16-NEXT: ptrue p0.s |
550 | 549 | ; B16B16-NEXT: bfmul z0.h, p0/m, z0.h, z1.h |
551 | 550 | ; B16B16-NEXT: ret |
552 | | - %res = fmul <vscale x 4 x bfloat> %a, %b |
| 551 | +; |
| 552 | +; NOB16B16-STREAMING-LABEL: fmul_nxv4bf16: |
| 553 | +; NOB16B16-STREAMING: // %bb.0: |
| 554 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 555 | +; NOB16B16-STREAMING-NEXT: ptrue p0.s |
| 556 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 557 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 558 | +; NOB16B16-STREAMING-NEXT: ret |
| 559 | + %res = fmul nsz <vscale x 4 x bfloat> %a, %b |
553 | 560 | ret <vscale x 4 x bfloat> %res |
554 | 561 | } |
555 | 562 |
|
556 | 563 | define <vscale x 8 x bfloat> @fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
557 | | -; NOB16B16-LABEL: fmul_nxv8bf16: |
| 564 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv8bf16: |
| 565 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 566 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 567 | +; NOB16B16-NONSTREAMING-NEXT: movi v3.2d, #0000000000000000 |
| 568 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.s |
| 569 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 570 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalt z3.s, z0.h, z1.h |
| 571 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 572 | +; NOB16B16-NONSTREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s |
| 573 | +; NOB16B16-NONSTREAMING-NEXT: ret |
| 574 | +; |
| 575 | +; B16B16-LABEL: fmul_nxv8bf16: |
| 576 | +; B16B16: // %bb.0: |
| 577 | +; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
| 578 | +; B16B16-NEXT: ret |
| 579 | +; |
| 580 | +; NOB16B16-STREAMING-LABEL: fmul_nxv8bf16: |
| 581 | +; NOB16B16-STREAMING: // %bb.0: |
| 582 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 583 | +; NOB16B16-STREAMING-NEXT: mov z3.s, #0 // =0x0 |
| 584 | +; NOB16B16-STREAMING-NEXT: ptrue p0.s |
| 585 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 586 | +; NOB16B16-STREAMING-NEXT: bfmlalt z3.s, z0.h, z1.h |
| 587 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 588 | +; NOB16B16-STREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s |
| 589 | +; NOB16B16-STREAMING-NEXT: ret |
| 590 | + %res = fmul nsz <vscale x 8 x bfloat> %a, %b |
| 591 | + ret <vscale x 8 x bfloat> %res |
| 592 | +} |
| 593 | + |
| 594 | +define <vscale x 8 x bfloat> @fmul_nxv8bf16_no_nsz(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| 595 | +; NOB16B16-LABEL: fmul_nxv8bf16_no_nsz: |
558 | 596 | ; NOB16B16: // %bb.0: |
559 | | -; NOB16B16-NEXT: uunpkhi z2.s, z1.h |
560 | | -; NOB16B16-NEXT: uunpkhi z3.s, z0.h |
561 | | -; NOB16B16-NEXT: uunpklo z1.s, z1.h |
562 | | -; NOB16B16-NEXT: uunpklo z0.s, z0.h |
| 597 | +; NOB16B16-NEXT: mov z2.s, #0x80000000 |
| 598 | +; NOB16B16-NEXT: mov z3.s, #0x80000000 |
563 | 599 | ; NOB16B16-NEXT: ptrue p0.s |
564 | | -; NOB16B16-NEXT: lsl z2.s, z2.s, #16 |
565 | | -; NOB16B16-NEXT: lsl z3.s, z3.s, #16 |
566 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
567 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
568 | | -; NOB16B16-NEXT: fmul z2.s, z3.s, z2.s |
569 | | -; NOB16B16-NEXT: fmul z0.s, z0.s, z1.s |
570 | | -; NOB16B16-NEXT: bfcvt z1.h, p0/m, z2.s |
571 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
572 | | -; NOB16B16-NEXT: uzp1 z0.h, z0.h, z1.h |
| 600 | +; NOB16B16-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 601 | +; NOB16B16-NEXT: bfmlalt z3.s, z0.h, z1.h |
| 602 | +; NOB16B16-NEXT: bfcvt z0.h, p0/m, z2.s |
| 603 | +; NOB16B16-NEXT: bfcvtnt z0.h, p0/m, z3.s |
573 | 604 | ; NOB16B16-NEXT: ret |
574 | 605 | ; |
575 | | -; B16B16-LABEL: fmul_nxv8bf16: |
| 606 | +; B16B16-LABEL: fmul_nxv8bf16_no_nsz: |
576 | 607 | ; B16B16: // %bb.0: |
577 | 608 | ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
578 | 609 | ; B16B16-NEXT: ret |
|
0 commit comments