|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | | -; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16 |
| 2 | +; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16,NOB16B16-NONSTREAMING |
3 | 3 | ; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16 |
4 | | -; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16 |
| 4 | +; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16,NOB16B16-STREAMING |
5 | 5 | ; RUN: llc -mattr=+sme2,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16 |
6 | 6 |
|
7 | 7 | target triple = "aarch64-unknown-linux-gnu" |
@@ -514,64 +514,82 @@ define <vscale x 8 x bfloat> @fmla_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x |
514 | 514 | ; |
515 | 515 |
|
516 | 516 | define <vscale x 2 x bfloat> @fmul_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
517 | | -; NOB16B16-LABEL: fmul_nxv2bf16: |
518 | | -; NOB16B16: // %bb.0: |
519 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
520 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
521 | | -; NOB16B16-NEXT: ptrue p0.d |
522 | | -; NOB16B16-NEXT: fmul z0.s, p0/m, z0.s, z1.s |
523 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
524 | | -; NOB16B16-NEXT: ret |
| 517 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv2bf16: |
| 518 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 519 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 520 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.d |
| 521 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 522 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 523 | +; NOB16B16-NONSTREAMING-NEXT: ret |
525 | 524 | ; |
526 | 525 | ; B16B16-LABEL: fmul_nxv2bf16: |
527 | 526 | ; B16B16: // %bb.0: |
528 | 527 | ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
529 | 528 | ; B16B16-NEXT: ret |
| 529 | +; |
| 530 | +; NOB16B16-STREAMING-LABEL: fmul_nxv2bf16: |
| 531 | +; NOB16B16-STREAMING: // %bb.0: |
| 532 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 533 | +; NOB16B16-STREAMING-NEXT: ptrue p0.d |
| 534 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 535 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 536 | +; NOB16B16-STREAMING-NEXT: ret |
530 | 537 | %res = fmul <vscale x 2 x bfloat> %a, %b |
531 | 538 | ret <vscale x 2 x bfloat> %res |
532 | 539 | } |
533 | 540 |
|
534 | 541 | define <vscale x 4 x bfloat> @fmul_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
535 | | -; NOB16B16-LABEL: fmul_nxv4bf16: |
536 | | -; NOB16B16: // %bb.0: |
537 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
538 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
539 | | -; NOB16B16-NEXT: ptrue p0.s |
540 | | -; NOB16B16-NEXT: fmul z0.s, z0.s, z1.s |
541 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
542 | | -; NOB16B16-NEXT: ret |
| 542 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv4bf16: |
| 543 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 544 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 545 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.s |
| 546 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 547 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 548 | +; NOB16B16-NONSTREAMING-NEXT: ret |
543 | 549 | ; |
544 | 550 | ; B16B16-LABEL: fmul_nxv4bf16: |
545 | 551 | ; B16B16: // %bb.0: |
546 | 552 | ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
547 | 553 | ; B16B16-NEXT: ret |
| 554 | +; |
| 555 | +; NOB16B16-STREAMING-LABEL: fmul_nxv4bf16: |
| 556 | +; NOB16B16-STREAMING: // %bb.0: |
| 557 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 558 | +; NOB16B16-STREAMING-NEXT: ptrue p0.s |
| 559 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 560 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 561 | +; NOB16B16-STREAMING-NEXT: ret |
548 | 562 | %res = fmul <vscale x 4 x bfloat> %a, %b |
549 | 563 | ret <vscale x 4 x bfloat> %res |
550 | 564 | } |
551 | 565 |
|
552 | 566 | define <vscale x 8 x bfloat> @fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
553 | | -; NOB16B16-LABEL: fmul_nxv8bf16: |
554 | | -; NOB16B16: // %bb.0: |
555 | | -; NOB16B16-NEXT: uunpkhi z2.s, z1.h |
556 | | -; NOB16B16-NEXT: uunpkhi z3.s, z0.h |
557 | | -; NOB16B16-NEXT: uunpklo z1.s, z1.h |
558 | | -; NOB16B16-NEXT: uunpklo z0.s, z0.h |
559 | | -; NOB16B16-NEXT: ptrue p0.s |
560 | | -; NOB16B16-NEXT: lsl z2.s, z2.s, #16 |
561 | | -; NOB16B16-NEXT: lsl z3.s, z3.s, #16 |
562 | | -; NOB16B16-NEXT: lsl z1.s, z1.s, #16 |
563 | | -; NOB16B16-NEXT: lsl z0.s, z0.s, #16 |
564 | | -; NOB16B16-NEXT: fmul z2.s, z3.s, z2.s |
565 | | -; NOB16B16-NEXT: fmul z0.s, z0.s, z1.s |
566 | | -; NOB16B16-NEXT: bfcvt z1.h, p0/m, z2.s |
567 | | -; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s |
568 | | -; NOB16B16-NEXT: uzp1 z0.h, z0.h, z1.h |
569 | | -; NOB16B16-NEXT: ret |
| 567 | +; NOB16B16-NONSTREAMING-LABEL: fmul_nxv8bf16: |
| 568 | +; NOB16B16-NONSTREAMING: // %bb.0: |
| 569 | +; NOB16B16-NONSTREAMING-NEXT: movi v2.2d, #0000000000000000 |
| 570 | +; NOB16B16-NONSTREAMING-NEXT: movi v3.2d, #0000000000000000 |
| 571 | +; NOB16B16-NONSTREAMING-NEXT: ptrue p0.s |
| 572 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 573 | +; NOB16B16-NONSTREAMING-NEXT: bfmlalt z3.s, z0.h, z1.h |
| 574 | +; NOB16B16-NONSTREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 575 | +; NOB16B16-NONSTREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s |
| 576 | +; NOB16B16-NONSTREAMING-NEXT: ret |
570 | 577 | ; |
571 | 578 | ; B16B16-LABEL: fmul_nxv8bf16: |
572 | 579 | ; B16B16: // %bb.0: |
573 | 580 | ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h |
574 | 581 | ; B16B16-NEXT: ret |
| 582 | +; |
| 583 | +; NOB16B16-STREAMING-LABEL: fmul_nxv8bf16: |
| 584 | +; NOB16B16-STREAMING: // %bb.0: |
| 585 | +; NOB16B16-STREAMING-NEXT: mov z2.s, #0 // =0x0 |
| 586 | +; NOB16B16-STREAMING-NEXT: mov z3.s, #0 // =0x0 |
| 587 | +; NOB16B16-STREAMING-NEXT: ptrue p0.s |
| 588 | +; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h |
| 589 | +; NOB16B16-STREAMING-NEXT: bfmlalt z3.s, z0.h, z1.h |
| 590 | +; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s |
| 591 | +; NOB16B16-STREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s |
| 592 | +; NOB16B16-STREAMING-NEXT: ret |
575 | 593 | %res = fmul <vscale x 8 x bfloat> %a, %b |
576 | 594 | ret <vscale x 8 x bfloat> %res |
577 | 595 | } |
|
0 commit comments