@@ -534,7 +534,7 @@ define <vscale x 2 x bfloat> @fmul_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
534534; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h
535535; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s
536536; NOB16B16-STREAMING-NEXT: ret
537- %res = fmul <vscale x 2 x bfloat> %a , %b
537+ %res = fmul nsz <vscale x 2 x bfloat> %a , %b
538538 ret <vscale x 2 x bfloat> %res
539539}
540540
@@ -559,7 +559,7 @@ define <vscale x 4 x bfloat> @fmul_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
559559; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h
560560; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s
561561; NOB16B16-STREAMING-NEXT: ret
562- %res = fmul <vscale x 4 x bfloat> %a , %b
562+ %res = fmul nsz <vscale x 4 x bfloat> %a , %b
563563 ret <vscale x 4 x bfloat> %res
564564}
565565
@@ -590,6 +590,27 @@ define <vscale x 8 x bfloat> @fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x
590590; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s
591591; NOB16B16-STREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s
592592; NOB16B16-STREAMING-NEXT: ret
593+ %res = fmul nsz <vscale x 8 x bfloat> %a , %b
594+ ret <vscale x 8 x bfloat> %res
595+ }
596+
597+ define <vscale x 8 x bfloat> @fmul_nxv8bf16_no_nsz (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b ) {
598+ ; NOB16B16-LABEL: fmul_nxv8bf16_no_nsz:
599+ ; NOB16B16: // %bb.0:
600+ ; NOB16B16-NEXT: mov w8, #-2147483648 // =0x80000000
601+ ; NOB16B16-NEXT: ptrue p0.s
602+ ; NOB16B16-NEXT: mov z2.s, w8
603+ ; NOB16B16-NEXT: mov z3.d, z2.d
604+ ; NOB16B16-NEXT: bfmlalb z2.s, z0.h, z1.h
605+ ; NOB16B16-NEXT: bfmlalt z3.s, z0.h, z1.h
606+ ; NOB16B16-NEXT: bfcvt z0.h, p0/m, z2.s
607+ ; NOB16B16-NEXT: bfcvtnt z0.h, p0/m, z3.s
608+ ; NOB16B16-NEXT: ret
609+ ;
610+ ; B16B16-LABEL: fmul_nxv8bf16_no_nsz:
611+ ; B16B16: // %bb.0:
612+ ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h
613+ ; B16B16-NEXT: ret
593614 %res = fmul <vscale x 8 x bfloat> %a , %b
594615 ret <vscale x 8 x bfloat> %res
595616}
0 commit comments