From c5b290592a08ad0143431c18606d3371fbbd87a6 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Mon, 17 Nov 2025 14:53:33 +0000 Subject: [PATCH] [LLVM][CodeGen][SVE] Only use unpredicated bfloat instructions when all lanes are in use. While SVE support for exception safe floating point code generation is bare bones we try to ensure inactive lanes remiain inert. I mistakenly broke this rule when adding support for SVE-B16B16 by lowering some bfloat operations of unpacked vectors to unpredicated instructions. --- .../Target/AArch64/AArch64ISelLowering.cpp | 6 ++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 2 -- llvm/test/CodeGen/AArch64/sve-bf16-arith.ll | 18 ++++++---- .../test/CodeGen/AArch64/sve-bf16-combines.ll | 36 ++++++------------- 4 files changed, 25 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 35836af3c874b..c6fed015f08fa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1774,14 +1774,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (Subtarget->hasSVEB16B16() && Subtarget->isNonStreamingSVEorSME2Available()) { - setOperationAction(ISD::FADD, VT, Legal); + setOperationAction(ISD::FADD, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); setOperationAction(ISD::FMAXIMUM, VT, Custom); setOperationAction(ISD::FMAXNUM, VT, Custom); setOperationAction(ISD::FMINIMUM, VT, Custom); setOperationAction(ISD::FMINNUM, VT, Custom); - setOperationAction(ISD::FMUL, VT, Legal); - setOperationAction(ISD::FSUB, VT, Legal); + setOperationAction(ISD::FMUL, VT, Custom); + setOperationAction(ISD::FSUB, VT, Custom); } } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1664f4ad0c8fa..ebaea477d698f 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2439,8 +2439,6 @@ multiclass sve_fp_3op_u_zd_bfloat opc, string asm, SDPatternOperator op> def NAME : sve_fp_3op_u_zd<0b00, opc, asm, ZPR16>; def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; } multiclass sve_fp_3op_u_zd_ftsmul opc, string asm, SDPatternOperator op> { diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll index 582e8456c05b3..2103bc30b8381 100644 --- a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll @@ -56,7 +56,8 @@ define @fadd_nxv2bf16( %a, %a, %b ret %res @@ -74,7 +75,8 @@ define @fadd_nxv4bf16( %a, %a, %b ret %res @@ -525,7 +527,8 @@ define @fmul_nxv2bf16( %a, %a, %b ret %res @@ -543,7 +546,8 @@ define @fmul_nxv4bf16( %a, %a, %b ret %res @@ -672,7 +676,8 @@ define @fsub_nxv2bf16( %a, %a, %b ret %res @@ -690,7 +695,8 @@ define @fsub_nxv4bf16( %a, %a, %b ret %res diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll index 16e8feb0dc5bb..86d4f61316446 100644 --- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll +++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll @@ -311,8 +311,7 @@ define @fadd_sel_nxv8bf16( %a, %mask, %b, zeroinitializer %fadd = fadd nsz %a, %sel @@ -341,8 +340,7 @@ define @fsub_sel_nxv8bf16( %a, %mask, %b, zeroinitializer %fsub = fsub %a, %sel @@ -371,8 +369,7 @@ define @fadd_sel_negzero_nxv8bf16( %a ; ; SVE-B16B16-LABEL: fadd_sel_negzero_nxv8bf16: ; SVE-B16B16: // %bb.0: -; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h -; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h +; SVE-B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h ; SVE-B16B16-NEXT: ret %nz = fneg zeroinitializer %sel = select %mask, %b, %nz @@ -402,8 +399,7 @@ define @fsub_sel_negzero_nxv8bf16( %a ; ; SVE-B16B16-LABEL: fsub_sel_negzero_nxv8bf16: ; SVE-B16B16: // %bb.0: -; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h -; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h +; SVE-B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h ; SVE-B16B16-NEXT: ret %nz = fneg zeroinitializer %sel = select %mask, %b, %nz @@ -490,9 +486,7 @@ define @fsub_sel_fmul_nxv8bf16( %a, < ; ; SVE-B16B16-LABEL: fsub_sel_fmul_nxv8bf16: ; SVE-B16B16: // %bb.0: -; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h -; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h -; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h +; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; SVE-B16B16-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -532,9 +526,7 @@ define @fadd_sel_fmul_nsz_nxv8bf16( % ; ; SVE-B16B16-LABEL: fadd_sel_fmul_nsz_nxv8bf16: ; SVE-B16B16: // %bb.0: -; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h -; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h -; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h +; SVE-B16B16-NEXT: bfmla z0.h, p0/m, z1.h, z2.h ; SVE-B16B16-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -574,9 +566,7 @@ define @fsub_sel_fmul_nsz_nxv8bf16( % ; ; SVE-B16B16-LABEL: fsub_sel_fmul_nsz_nxv8bf16: ; SVE-B16B16: // %bb.0: -; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h -; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h -; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h +; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; SVE-B16B16-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -616,9 +606,7 @@ define @fadd_sel_fmul_negzero_nxv8bf16( %b, %c %nz = fneg zeroinitializer @@ -711,9 +699,7 @@ define @fadd_sel_fmul_negzero_nsz_nxv8bf16( %b, %c %nz = fneg zeroinitializer @@ -754,9 +740,7 @@ define @fsub_sel_fmul_negzero_nsz_nxv8bf16( %b, %c %nz = fneg zeroinitializer