Skip to content

Commit 76fece9

Browse files
openeuler-ci-botgitee-org
authored andcommitted
!115 [Backport][AArch64][SME] Add support for sme-fa64
From: @xiajingze Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao
2 parents 52b83eb + 14b255b commit 76fece9

File tree

12 files changed

+91
-8
lines changed

12 files changed

+91
-8
lines changed

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
666666
.Case("sme", HasSME)
667667
.Case("sme-f64f64", HasSMEF64F64)
668668
.Case("sme-i16i64", HasSMEI16I64)
669+
.Case("sme-fa64", HasSMEFA64)
669670
.Cases("memtag", "memtag2", HasMTE)
670671
.Case("sb", HasSB)
671672
.Case("predres", HasPredRes)
@@ -795,6 +796,13 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
795796
HasBFloat16 = true;
796797
HasFullFP16 = true;
797798
}
799+
if (Feature == "+sme-fa64") {
800+
FPU |= NeonMode;
801+
FPU |= SveMode;
802+
HasSME = true;
803+
HasSVE2 = true;
804+
HasSMEFA64 = true;
805+
}
798806
if (Feature == "+sb")
799807
HasSB = true;
800808
if (Feature == "+predres")

clang/lib/Basic/Targets/AArch64.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
8383
bool HasFMV = true;
8484
bool HasGCS = false;
8585
bool HasRCPC3 = false;
86+
bool HasSMEFA64 = false;
8687

8788
const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A;
8889

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ enum ArchExtKind : uint64_t {
154154
AEK_RASv2 = 1ULL << 54, // FEAT_RASv2
155155
AEK_ITE = 1ULL << 55, // FEAT_ITE
156156
AEK_GCS = 1ULL << 56, // FEAT_GCS
157+
AEK_SMEFA64 = 1ULL << 57, // FEAT_SME_FA64
157158
};
158159
// clang-format on
159160

@@ -260,6 +261,7 @@ inline constexpr ExtensionInfo Extensions[] = {
260261
{"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_MAX, "", 0},
261262
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
262263
{"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_MAX, "", 0},
264+
{"sme-fa64", AArch64::AEK_SMEFA64, "+sme-fa64", "-sme-fa64", FEAT_MAX, "", 0},
263265
// Special cases
264266
{"none", AArch64::AEK_NONE, {}, {}, FEAT_MAX, "", ExtensionInfo::MaxFMVPriority},
265267
};

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true",
503503
def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true",
504504
"Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>;
505505

506+
def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true",
507+
"Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>;
508+
506509
def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",
507510
"Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>;
508511

@@ -746,7 +749,7 @@ def SME2Unsupported : AArch64Unsupported {
746749
}
747750

748751
def SMEUnsupported : AArch64Unsupported {
749-
let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64],
752+
let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64],
750753
SME2Unsupported.F);
751754
}
752755

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">,
154154
AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
155155
def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">,
156156
AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
157+
def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">,
158+
AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
157159
def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">,
158160
AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
159161
def HasSME2 : Predicate<"Subtarget->hasSME2()">,

llvm/lib/Target/AArch64/AArch64SchedA64FX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
2323
list<Predicate> UnsupportedFeatures =
2424
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
2525
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
26-
HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
26+
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSMEFA64];
2727

2828
let FullInstRWOverlapCheck = 0;
2929
}

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -489,11 +489,11 @@ bool AArch64Subtarget::isStreamingCompatible() const {
489489
}
490490

491491
bool AArch64Subtarget::isNeonAvailable() const {
492-
return hasNEON() && !isStreaming() && !isStreamingCompatible();
492+
return hasNEON() &&
493+
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
493494
}
494495

495-
bool AArch64Subtarget::isSVEAvailable() const{
496-
// FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet
497-
// as we don't yet support the feature in LLVM.
498-
return hasSVE() && !isStreaming() && !isStreamingCompatible();
496+
bool AArch64Subtarget::isSVEAvailable() const {
497+
return hasSVE() &&
498+
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
499499
}

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3625,6 +3625,7 @@ static const struct Extension {
36253625
{"sb", {AArch64::FeatureSB}},
36263626
{"ssbs", {AArch64::FeatureSSBS}},
36273627
{"tme", {AArch64::FeatureTME}},
3628+
{"sme-fa64", {AArch64::FeatureSMEFA64}},
36283629
};
36293630

36303631
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64
3+
; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64
4+
5+
6+
target triple = "aarch64-unknown-linux-gnu"
7+
8+
define half @fadda_v4f16(half %start, <4 x half> %a) {
9+
; FA64-LABEL: fadda_v4f16:
10+
; FA64: // %bb.0:
11+
; FA64-NEXT: ptrue p0.h, vl4
12+
; FA64-NEXT: // kill: def $h0 killed $h0 def $z0
13+
; FA64-NEXT: // kill: def $d1 killed $d1 def $z1
14+
; FA64-NEXT: fadda h0, p0, h0, z1.h
15+
; FA64-NEXT: // kill: def $h0 killed $h0 killed $z0
16+
; FA64-NEXT: ret
17+
;
18+
; NO-FA64-LABEL: fadda_v4f16:
19+
; NO-FA64: // %bb.0:
20+
; NO-FA64-NEXT: // kill: def $d1 killed $d1 def $z1
21+
; NO-FA64-NEXT: fadd h0, h0, h1
22+
; NO-FA64-NEXT: mov z2.h, z1.h[1]
23+
; NO-FA64-NEXT: fadd h0, h0, h2
24+
; NO-FA64-NEXT: mov z2.h, z1.h[2]
25+
; NO-FA64-NEXT: mov z1.h, z1.h[3]
26+
; NO-FA64-NEXT: fadd h0, h0, h2
27+
; NO-FA64-NEXT: fadd h0, h0, h1
28+
; NO-FA64-NEXT: ret
29+
%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
30+
ret half %res
31+
}
32+
33+
declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64
3+
; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
8+
; FA64-LABEL: mla8xi8:
9+
; FA64: // %bb.0:
10+
; FA64-NEXT: mla v2.8b, v0.8b, v1.8b
11+
; FA64-NEXT: fmov d0, d2
12+
; FA64-NEXT: ret
13+
;
14+
; NO-FA64-LABEL: mla8xi8:
15+
; NO-FA64: // %bb.0:
16+
; NO-FA64-NEXT: ptrue p0.b, vl8
17+
; NO-FA64-NEXT: // kill: def $d0 killed $d0 def $z0
18+
; NO-FA64-NEXT: // kill: def $d2 killed $d2 def $z2
19+
; NO-FA64-NEXT: // kill: def $d1 killed $d1 def $z1
20+
; NO-FA64-NEXT: mad z0.b, p0/m, z1.b, z2.b
21+
; NO-FA64-NEXT: // kill: def $d0 killed $d0 killed $z0
22+
; NO-FA64-NEXT: ret
23+
%tmp1 = mul <8 x i8> %A, %B;
24+
%tmp2 = add <8 x i8> %C, %tmp1;
25+
ret <8 x i8> %tmp2
26+
}

0 commit comments

Comments
 (0)