-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LLVM][CodeGen][SME] hasB16b16() is not sufficient to prove BFADD availability. #154143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…ilability. The FEAT_SVE_B16B16 arithmetic instructions are only available to streaming mode functions when SME2 is available.
Member
|
@llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesThe FEAT_SVE_B16B16 arithmetic instructions are only available to streaming mode functions when SME2 is available. Full diff: https://github.com/llvm/llvm-project/pull/154143.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2072e48914ae6..834e02a5b1d72 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1769,7 +1769,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
- if (Subtarget->hasSVEB16B16()) {
+ if (Subtarget->hasSVEB16B16() &&
+ Subtarget->isNonStreamingSVEorSME2Available()) {
setOperationAction(ISD::FADD, VT, Legal);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
@@ -1791,7 +1792,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(Opcode, MVT::nxv8bf16, MVT::nxv8f32);
}
- if (!Subtarget->hasSVEB16B16()) {
+ if (!Subtarget->hasSVEB16B16() ||
+ !Subtarget->isNonStreamingSVEorSME2Available()) {
for (auto Opcode : {ISD::FADD, ISD::FMA, ISD::FMAXIMUM, ISD::FMAXNUM,
ISD::FMINIMUM, ISD::FMINNUM, ISD::FMUL, ISD::FSUB}) {
setOperationPromotedToType(Opcode, MVT::nxv2bf16, MVT::nxv2f32);
@@ -18123,7 +18125,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
case MVT::f64:
return true;
case MVT::bf16:
- return VT.isScalableVector() && Subtarget->hasSVEB16B16();
+ return VT.isScalableVector() && Subtarget->hasSVEB16B16() &&
+ Subtarget->isNonStreamingSVEorSME2Available();
default:
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 8cfbff938a395..0b4307fbdf6df 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -143,7 +143,7 @@ def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->isSVEAvailable()">,
AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
-def HasSVEB16B16 : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEB16B16()">,
+def HasSVEB16B16 : Predicate<"Subtarget->hasSVEB16B16()">,
AssemblerPredicateWithAll<(all_of FeatureSVEB16B16), "sve-b16b16">;
def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
@@ -248,6 +248,10 @@ def HasSVE_or_SME
: Predicate<"Subtarget->isSVEorStreamingSVEAvailable()">,
AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
"sve or sme">;
+def HasNonStreamingSVE_or_SME2
+ : Predicate<"Subtarget->isNonStreamingSVEorSME2Available()">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2),
+ "sve or sme2">;
def HasNonStreamingSVE_or_SME2p1
: Predicate<"Subtarget->isSVEAvailable() ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p1())">,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 509dd8b73a017..eeb47b4d99750 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4408,7 +4408,7 @@ def : InstAlias<"pfalse\t$Pd", (PFALSE PPRorPNR8:$Pd), 0>;
// Non-widening BFloat16 to BFloat16 instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasSVEB16B16] in {
+let Predicates = [HasSVEB16B16, HasNonStreamingSVE_or_SME2] in {
defm BFADD_ZZZ : sve_fp_3op_u_zd_bfloat<0b000, "bfadd", AArch64fadd>;
defm BFSUB_ZZZ : sve_fp_3op_u_zd_bfloat<0b001, "bfsub", AArch64fsub>;
defm BFMUL_ZZZ : sve_fp_3op_u_zd_bfloat<0b010, "bfmul", AArch64fmul>;
@@ -4441,9 +4441,9 @@ defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, AArch64fmlsid
defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", AArch64fmulidx>;
defm BFCLAMP_ZZZ : sve_fp_clamp_bfloat<"bfclamp", AArch64fclamp>;
-} // End HasSVEB16B16
+} // End HasSVEB16B16, HasNonStreamingSVE_or_SME2
-let Predicates = [HasSVEB16B16, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVEB16B16, HasNonStreamingSVE_or_SME2, UseExperimentalZeroingPseudos] in {
defm BFADD_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fadd>;
defm BFSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fsub>;
defm BFMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmul>;
@@ -4451,7 +4451,7 @@ defm BFMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmaxnm>;
defm BFMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fminnm>;
defm BFMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmin>;
defm BFMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmax>;
-} // HasSVEB16B16, UseExperimentalZeroingPseudos
+} // HasSVEB16B16, HasNonStreamingSVE_or_SME2, UseExperimentalZeroingPseudos
let Predicates = [HasSVEBFSCALE] in {
def BFSCALE_ZPZZ : sve_fp_2op_p_zds_bfscale<0b1001, "bfscale", DestructiveBinary>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index d00e4471e107d..01c0bcc3a6a78 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -212,6 +212,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
return hasSVE() || isStreamingSVEAvailable();
}
+ /// Returns true if the target has access to either the full range of SVE
+ /// instructions, or the streaming-compatible subset of SVE instructions
+ /// available to SME2.
+ bool isNonStreamingSVEorSME2Available() const {
+ return isSVEAvailable() || (isSVEorStreamingSVEAvailable() && hasSME2());
+ }
+
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
// we don't yet support streaming-compatible codegen support that we trust
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
index 83f4f8fc57aae..0580f5e0b019a 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
-; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
-; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16
+; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
+; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16
+; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
+; RUN: llc -mattr=+sme2,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16
target triple = "aarch64-unknown-linux-gnu"
|
…bling all of SVE.
huntergr-arm
approved these changes
Aug 19, 2025
Collaborator
huntergr-arm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
The FEAT_SVE_B16B16 arithmetic instructions are only available to streaming mode functions when SME2 is available.
https://developer.arm.com/documentation/ddi0602/2025-06/SVE-Instructions/BFADD--predicated---BFloat16-add--predicated--?lang=en