Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18570,7 +18570,7 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
case MVT::f64:
return true;
case MVT::bf16:
return VT.isScalableVector() && Subtarget->hasSVEB16B16() &&
return VT.isScalableVector() && Subtarget->hasBF16() &&
Subtarget->isNonStreamingSVEorSME2Available();
default:
break;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2578,6 +2578,10 @@ let Predicates = [HasBF16, HasSVE_or_SME] in {
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;

def : Pat<(nxv4f32 (AArch64fmla_p (SVEAllActive), nxv4f32:$acc, (nxv4f32 (AArch64fcvte_mt (SVEAllActive), nxv4bf16:$Zn, (undef))),
(nxv4f32 (AArch64fcvte_mt (SVEAllActive), nxv4bf16:$Zm, (undef))))),
(BFMLALB_ZZZ nxv4f32:$acc, ZPR:$Zn, ZPR:$Zm)>;

defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
} // End HasBF16, HasSVE_or_SME
Expand Down
28 changes: 11 additions & 17 deletions llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,10 @@ define <vscale x 2 x bfloat> @fmla_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
define <vscale x 4 x bfloat> @fmla_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c) {
; NOB16B16-LABEL: fmla_nxv4bf16:
; NOB16B16: // %bb.0:
; NOB16B16-NEXT: lsl z1.s, z1.s, #16
; NOB16B16-NEXT: lsl z0.s, z0.s, #16
; NOB16B16-NEXT: lsl z2.s, z2.s, #16
; NOB16B16-NEXT: ptrue p0.s
; NOB16B16-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s
; NOB16B16-NEXT: bfmlalb z2.s, z0.h, z1.h
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z2.s
; NOB16B16-NEXT: ret
;
; B16B16-LABEL: fmla_nxv4bf16:
Expand All @@ -486,24 +484,20 @@ define <vscale x 4 x bfloat> @fmla_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
define <vscale x 8 x bfloat> @fmla_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) {
; NOB16B16-LABEL: fmla_nxv8bf16:
; NOB16B16: // %bb.0:
; NOB16B16-NEXT: uunpkhi z3.s, z1.h
; NOB16B16-NEXT: uunpkhi z4.s, z0.h
; NOB16B16-NEXT: uunpkhi z5.s, z2.h
; NOB16B16-NEXT: uunpkhi z3.s, z2.h
; NOB16B16-NEXT: uunpklo z2.s, z2.h
; NOB16B16-NEXT: uunpkhi z4.s, z1.h
; NOB16B16-NEXT: uunpkhi z5.s, z0.h
; NOB16B16-NEXT: uunpklo z1.s, z1.h
; NOB16B16-NEXT: uunpklo z0.s, z0.h
; NOB16B16-NEXT: uunpklo z2.s, z2.h
; NOB16B16-NEXT: ptrue p0.s
; NOB16B16-NEXT: lsl z3.s, z3.s, #16
; NOB16B16-NEXT: lsl z4.s, z4.s, #16
; NOB16B16-NEXT: lsl z5.s, z5.s, #16
; NOB16B16-NEXT: lsl z1.s, z1.s, #16
; NOB16B16-NEXT: lsl z0.s, z0.s, #16
; NOB16B16-NEXT: lsl z2.s, z2.s, #16
; NOB16B16-NEXT: fmad z3.s, p0/m, z4.s, z5.s
; NOB16B16-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; NOB16B16-NEXT: bfcvt z1.h, p0/m, z3.s
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s
; NOB16B16-NEXT: uzp1 z0.h, z0.h, z1.h
; NOB16B16-NEXT: bfmlalb z3.s, z5.h, z4.h
; NOB16B16-NEXT: bfmlalb z2.s, z0.h, z1.h
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z3.s
; NOB16B16-NEXT: bfcvt z1.h, p0/m, z2.s
; NOB16B16-NEXT: uzp1 z0.h, z1.h, z0.h
; NOB16B16-NEXT: ret
;
; B16B16-LABEL: fmla_nxv8bf16:
Expand Down
Loading