Skip to content

Commit c07ac57

Browse files
[LLVM][CodeGen][SME] hasB16b16() is not sufficient to prove BFADD availability.
The FEAT_SVE_B16B16 arithmetic instructions are only available to streaming mode functions when SME2 is available.
1 parent 6602d6c commit c07ac57

File tree

5 files changed

+26
-12
lines changed

5 files changed

+26
-12
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,7 +1769,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
17691769
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
17701770
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
17711771

1772-
if (Subtarget->hasSVEB16B16()) {
1772+
if (Subtarget->hasSVEB16B16() &&
1773+
Subtarget->isNonStreamingSVEorSME2Available()) {
17731774
setOperationAction(ISD::FADD, VT, Legal);
17741775
setOperationAction(ISD::FMA, VT, Custom);
17751776
setOperationAction(ISD::FMAXIMUM, VT, Custom);
@@ -1791,7 +1792,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
17911792
setOperationPromotedToType(Opcode, MVT::nxv8bf16, MVT::nxv8f32);
17921793
}
17931794

1794-
if (!Subtarget->hasSVEB16B16()) {
1795+
if (!Subtarget->hasSVEB16B16() ||
1796+
!Subtarget->isNonStreamingSVEorSME2Available()) {
17951797
for (auto Opcode : {ISD::FADD, ISD::FMA, ISD::FMAXIMUM, ISD::FMAXNUM,
17961798
ISD::FMINIMUM, ISD::FMINNUM, ISD::FMUL, ISD::FSUB}) {
17971799
setOperationPromotedToType(Opcode, MVT::nxv2bf16, MVT::nxv2f32);
@@ -18123,7 +18125,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
1812318125
case MVT::f64:
1812418126
return true;
1812518127
case MVT::bf16:
18126-
return VT.isScalableVector() && Subtarget->hasSVEB16B16();
18128+
return VT.isScalableVector() && Subtarget->hasSVEB16B16() &&
18129+
Subtarget->isNonStreamingSVEorSME2Available();
1812718130
default:
1812818131
break;
1812918132
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
143143
"fuse-aes">;
144144
def HasSVE : Predicate<"Subtarget->isSVEAvailable()">,
145145
AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
146-
def HasSVEB16B16 : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEB16B16()">,
146+
def HasSVEB16B16 : Predicate<"Subtarget->hasSVEB16B16()">,
147147
AssemblerPredicateWithAll<(all_of FeatureSVEB16B16), "sve-b16b16">;
148148
def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
149149
AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
@@ -248,6 +248,10 @@ def HasSVE_or_SME
248248
: Predicate<"Subtarget->isSVEorStreamingSVEAvailable()">,
249249
AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
250250
"sve or sme">;
251+
def HasNonStreamingSVE_or_SME2
252+
: Predicate<"Subtarget->isNonStreamingSVEorSME2Available()">,
253+
AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2),
254+
"sve or sme2">;
251255
def HasNonStreamingSVE_or_SME2p1
252256
: Predicate<"Subtarget->isSVEAvailable() ||"
253257
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p1())">,

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4408,7 +4408,7 @@ def : InstAlias<"pfalse\t$Pd", (PFALSE PPRorPNR8:$Pd), 0>;
44084408
// Non-widening BFloat16 to BFloat16 instructions
44094409
//===----------------------------------------------------------------------===//
44104410

4411-
let Predicates = [HasSVEB16B16] in {
4411+
let Predicates = [HasSVEB16B16, HasNonStreamingSVE_or_SME2] in {
44124412
defm BFADD_ZZZ : sve_fp_3op_u_zd_bfloat<0b000, "bfadd", AArch64fadd>;
44134413
defm BFSUB_ZZZ : sve_fp_3op_u_zd_bfloat<0b001, "bfsub", AArch64fsub>;
44144414
defm BFMUL_ZZZ : sve_fp_3op_u_zd_bfloat<0b010, "bfmul", AArch64fmul>;
@@ -4441,17 +4441,17 @@ defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, AArch64fmlsid
44414441
defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", AArch64fmulidx>;
44424442

44434443
defm BFCLAMP_ZZZ : sve_fp_clamp_bfloat<"bfclamp", AArch64fclamp>;
4444-
} // End HasSVEB16B16
4444+
} // End HasSVEB16B16, HasNonStreamingSVE_or_SME2
44454445

4446-
let Predicates = [HasSVEB16B16, UseExperimentalZeroingPseudos] in {
4446+
let Predicates = [HasSVEB16B16, HasNonStreamingSVE_or_SME2, UseExperimentalZeroingPseudos] in {
44474447
defm BFADD_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fadd>;
44484448
defm BFSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fsub>;
44494449
defm BFMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmul>;
44504450
defm BFMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmaxnm>;
44514451
defm BFMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fminnm>;
44524452
defm BFMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmin>;
44534453
defm BFMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmax>;
4454-
} // HasSVEB16B16, UseExperimentalZeroingPseudos
4454+
} // HasSVEB16B16, HasNonStreamingSVE_or_SME2, UseExperimentalZeroingPseudos
44554455

44564456
let Predicates = [HasSVEBFSCALE] in {
44574457
def BFSCALE_ZPZZ : sve_fp_2op_p_zds_bfscale<0b1001, "bfscale", DestructiveBinary>;

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
212212
return hasSVE() || isStreamingSVEAvailable();
213213
}
214214

215+
/// Returns true if the target has access to either the full range of SVE
216+
/// instructions, or the streaming-compatible subset of SVE instructions
217+
/// available to SME2.
218+
bool isNonStreamingSVEorSME2Available() const {
219+
return isSVEAvailable() || (isSVEorStreamingSVEAvailable() && hasSME2());
220+
}
221+
215222
unsigned getMinVectorRegisterBitWidth() const {
216223
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
217224
// we don't yet support streaming-compatible codegen support that we trust

llvm/test/CodeGen/AArch64/sve-bf16-arith.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
3-
; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16
4-
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
5-
; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16
2+
; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
3+
; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16
4+
; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
5+
; RUN: llc -mattr=+sme2,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16
66

77
target triple = "aarch64-unknown-linux-gnu"
88

0 commit comments

Comments
 (0)