Skip to content

Commit 52b83eb

Browse files
openeuler-ci-botgitee-org
authored andcommitted
!114 [Backport][AArch64][SME] Create new interface for isSVEAvailable.
From: @xiajingze Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao
2 parents c310ea3 + c74896f commit 52b83eb

File tree

6 files changed

+127
-60
lines changed

6 files changed

+127
-60
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,7 +1453,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14531453
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
14541454
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
14551455
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
1456-
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1456+
if (Subtarget->isSVEAvailable())
1457+
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
14571458
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
14581459
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
14591460
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -1507,9 +1508,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15071508
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
15081509
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
15091510

1510-
// NEON doesn't support across-vector reductions, but SVE does.
1511-
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1512-
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1511+
if (Subtarget->isSVEAvailable()) {
1512+
// NEON doesn't support across-vector reductions, but SVE does.
1513+
for (auto VT :
1514+
{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1515+
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1516+
}
15131517

15141518
if (!Subtarget->isNeonAvailable()) {
15151519
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
@@ -1867,7 +1871,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
18671871
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
18681872
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
18691873
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1870-
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1874+
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT,
1875+
StreamingSVE ? Expand : Custom);
18711876
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
18721877
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
18731878
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -484,14 +484,16 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
484484

485485
bool AArch64Subtarget::useAA() const { return UseAA; }
486486

487-
bool AArch64Subtarget::isNeonAvailable() const {
488-
if (!hasNEON())
489-
return false;
487+
bool AArch64Subtarget::isStreamingCompatible() const {
488+
return StreamingCompatibleSVEMode || ForceStreamingCompatibleSVE;
489+
}
490490

491-
// The 'force-streaming-comaptible-sve' flag overrides the streaming
492-
// function attributes.
493-
if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
494-
return !ForceStreamingCompatibleSVE;
491+
bool AArch64Subtarget::isNeonAvailable() const {
492+
return hasNEON() && !isStreaming() && !isStreamingCompatible();
493+
}
495494

496-
return !isStreaming() && !isStreamingCompatible();
495+
bool AArch64Subtarget::isSVEAvailable() const{
496+
// FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet
497+
// as we don't yet support the feature in LLVM.
498+
return hasSVE() && !isStreaming() && !isStreamingCompatible();
497499
}

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,20 +205,28 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
205205

206206
bool isXRaySupported() const override { return true; }
207207

208-
/// Returns true if the function has the streaming attribute.
208+
/// Returns true if the function has a streaming body.
209209
bool isStreaming() const { return StreamingSVEMode; }
210210

211-
/// Returns true if the function has the streaming-compatible attribute.
212-
bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }
211+
/// Returns true if the function has a streaming-compatible body.
212+
bool isStreamingCompatible() const;
213213

214214
/// Returns true if the target has NEON and the function at runtime is known
215215
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
216216
/// mode, which disables NEON instructions).
217217
bool isNeonAvailable() const;
218218

219+
/// Returns true if the target has SVE and can use the full range of SVE
220+
/// instructions, for example because it knows the function is known not to be
221+
/// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
222+
bool isSVEAvailable() const;
223+
219224
unsigned getMinVectorRegisterBitWidth() const {
220-
// Don't assume any minimum vector size when PSTATE.SM may not be 0.
221-
if (StreamingSVEMode || StreamingCompatibleSVEMode)
225+
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
226+
// we don't yet support streaming-compatible codegen support that we trust
227+
// is safe for functions that may be executed in streaming-SVE mode.
228+
// By returning '0' here, we disable vectorization.
229+
if (!isSVEAvailable() && !isNeonAvailable())
222230
return 0;
223231
return MinVectorRegisterBitWidth;
224232
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1967,8 +1967,7 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
19671967

19681968
return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
19691969
case TargetTransformInfo::RGK_ScalableVector:
1970-
if ((ST->isStreaming() || ST->isStreamingCompatible()) &&
1971-
!EnableScalableAutovecInStreamingMode)
1970+
if (!ST->isSVEAvailable() && !EnableScalableAutovecInStreamingMode)
19721971
return TypeSize::getScalable(0);
19731972

19741973
return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);

llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mattr=+sve < %s | FileCheck %s
33

4-
; FIXME: Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile!
5-
; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
4+
; Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile!
5+
; RUN: not --crash llc -mattr=+sve -force-streaming-compatible-sve < %s
66

77
target triple = "aarch64-linux-gnu"
88

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll

Lines changed: 91 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@ target triple = "aarch64-unknown-linux-gnu"
1010
define half @fadda_v4f16(half %start, <4 x half> %a) {
1111
; CHECK-LABEL: fadda_v4f16:
1212
; CHECK: // %bb.0:
13-
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
14-
; CHECK-NEXT: ptrue p0.h, vl4
1513
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
16-
; CHECK-NEXT: fadda h0, p0, h0, z1.h
17-
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
14+
; CHECK-NEXT: fadd h0, h0, h1
15+
; CHECK-NEXT: mov z2.h, z1.h[1]
16+
; CHECK-NEXT: fadd h0, h0, h2
17+
; CHECK-NEXT: mov z2.h, z1.h[2]
18+
; CHECK-NEXT: mov z1.h, z1.h[3]
19+
; CHECK-NEXT: fadd h0, h0, h2
20+
; CHECK-NEXT: fadd h0, h0, h1
1821
; CHECK-NEXT: ret
1922
%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
2023
ret half %res
@@ -23,11 +26,22 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
2326
define half @fadda_v8f16(half %start, <8 x half> %a) {
2427
; CHECK-LABEL: fadda_v8f16:
2528
; CHECK: // %bb.0:
26-
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
27-
; CHECK-NEXT: ptrue p0.h, vl8
2829
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
29-
; CHECK-NEXT: fadda h0, p0, h0, z1.h
30-
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
30+
; CHECK-NEXT: fadd h0, h0, h1
31+
; CHECK-NEXT: mov z2.h, z1.h[1]
32+
; CHECK-NEXT: fadd h0, h0, h2
33+
; CHECK-NEXT: mov z2.h, z1.h[2]
34+
; CHECK-NEXT: fadd h0, h0, h2
35+
; CHECK-NEXT: mov z2.h, z1.h[3]
36+
; CHECK-NEXT: fadd h0, h0, h2
37+
; CHECK-NEXT: mov z2.h, z1.h[4]
38+
; CHECK-NEXT: fadd h0, h0, h2
39+
; CHECK-NEXT: mov z2.h, z1.h[5]
40+
; CHECK-NEXT: fadd h0, h0, h2
41+
; CHECK-NEXT: mov z2.h, z1.h[6]
42+
; CHECK-NEXT: mov z1.h, z1.h[7]
43+
; CHECK-NEXT: fadd h0, h0, h2
44+
; CHECK-NEXT: fadd h0, h0, h1
3145
; CHECK-NEXT: ret
3246
%res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
3347
ret half %res
@@ -36,12 +50,38 @@ define half @fadda_v8f16(half %start, <8 x half> %a) {
3650
define half @fadda_v16f16(half %start, ptr %a) {
3751
; CHECK-LABEL: fadda_v16f16:
3852
; CHECK: // %bb.0:
39-
; CHECK-NEXT: ldp q1, q2, [x0]
40-
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
41-
; CHECK-NEXT: ptrue p0.h, vl8
42-
; CHECK-NEXT: fadda h0, p0, h0, z1.h
43-
; CHECK-NEXT: fadda h0, p0, h0, z2.h
44-
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
53+
; CHECK-NEXT: ldr q1, [x0]
54+
; CHECK-NEXT: fadd h0, h0, h1
55+
; CHECK-NEXT: mov z2.h, z1.h[1]
56+
; CHECK-NEXT: fadd h0, h0, h2
57+
; CHECK-NEXT: mov z2.h, z1.h[2]
58+
; CHECK-NEXT: fadd h0, h0, h2
59+
; CHECK-NEXT: mov z2.h, z1.h[3]
60+
; CHECK-NEXT: fadd h0, h0, h2
61+
; CHECK-NEXT: mov z2.h, z1.h[4]
62+
; CHECK-NEXT: fadd h0, h0, h2
63+
; CHECK-NEXT: mov z2.h, z1.h[5]
64+
; CHECK-NEXT: fadd h0, h0, h2
65+
; CHECK-NEXT: mov z2.h, z1.h[6]
66+
; CHECK-NEXT: mov z1.h, z1.h[7]
67+
; CHECK-NEXT: fadd h0, h0, h2
68+
; CHECK-NEXT: fadd h0, h0, h1
69+
; CHECK-NEXT: ldr q1, [x0, #16]
70+
; CHECK-NEXT: mov z2.h, z1.h[1]
71+
; CHECK-NEXT: fadd h0, h0, h1
72+
; CHECK-NEXT: fadd h0, h0, h2
73+
; CHECK-NEXT: mov z2.h, z1.h[2]
74+
; CHECK-NEXT: fadd h0, h0, h2
75+
; CHECK-NEXT: mov z2.h, z1.h[3]
76+
; CHECK-NEXT: fadd h0, h0, h2
77+
; CHECK-NEXT: mov z2.h, z1.h[4]
78+
; CHECK-NEXT: fadd h0, h0, h2
79+
; CHECK-NEXT: mov z2.h, z1.h[5]
80+
; CHECK-NEXT: fadd h0, h0, h2
81+
; CHECK-NEXT: mov z2.h, z1.h[6]
82+
; CHECK-NEXT: mov z1.h, z1.h[7]
83+
; CHECK-NEXT: fadd h0, h0, h2
84+
; CHECK-NEXT: fadd h0, h0, h1
4585
; CHECK-NEXT: ret
4686
%op = load <16 x half>, ptr %a
4787
%res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
@@ -51,11 +91,10 @@ define half @fadda_v16f16(half %start, ptr %a) {
5191
define float @fadda_v2f32(float %start, <2 x float> %a) {
5292
; CHECK-LABEL: fadda_v2f32:
5393
; CHECK: // %bb.0:
54-
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
55-
; CHECK-NEXT: ptrue p0.s, vl2
5694
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
57-
; CHECK-NEXT: fadda s0, p0, s0, z1.s
58-
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
95+
; CHECK-NEXT: fadd s0, s0, s1
96+
; CHECK-NEXT: mov z1.s, z1.s[1]
97+
; CHECK-NEXT: fadd s0, s0, s1
5998
; CHECK-NEXT: ret
6099
%res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
61100
ret float %res
@@ -64,11 +103,14 @@ define float @fadda_v2f32(float %start, <2 x float> %a) {
64103
define float @fadda_v4f32(float %start, <4 x float> %a) {
65104
; CHECK-LABEL: fadda_v4f32:
66105
; CHECK: // %bb.0:
67-
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
68-
; CHECK-NEXT: ptrue p0.s, vl4
69106
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
70-
; CHECK-NEXT: fadda s0, p0, s0, z1.s
71-
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
107+
; CHECK-NEXT: fadd s0, s0, s1
108+
; CHECK-NEXT: mov z2.s, z1.s[1]
109+
; CHECK-NEXT: fadd s0, s0, s2
110+
; CHECK-NEXT: mov z2.s, z1.s[2]
111+
; CHECK-NEXT: mov z1.s, z1.s[3]
112+
; CHECK-NEXT: fadd s0, s0, s2
113+
; CHECK-NEXT: fadd s0, s0, s1
72114
; CHECK-NEXT: ret
73115
%res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
74116
ret float %res
@@ -77,12 +119,22 @@ define float @fadda_v4f32(float %start, <4 x float> %a) {
77119
define float @fadda_v8f32(float %start, ptr %a) {
78120
; CHECK-LABEL: fadda_v8f32:
79121
; CHECK: // %bb.0:
80-
; CHECK-NEXT: ldp q1, q2, [x0]
81-
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
82-
; CHECK-NEXT: ptrue p0.s, vl4
83-
; CHECK-NEXT: fadda s0, p0, s0, z1.s
84-
; CHECK-NEXT: fadda s0, p0, s0, z2.s
85-
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
122+
; CHECK-NEXT: ldr q1, [x0]
123+
; CHECK-NEXT: fadd s0, s0, s1
124+
; CHECK-NEXT: mov z2.s, z1.s[1]
125+
; CHECK-NEXT: fadd s0, s0, s2
126+
; CHECK-NEXT: mov z2.s, z1.s[2]
127+
; CHECK-NEXT: mov z1.s, z1.s[3]
128+
; CHECK-NEXT: fadd s0, s0, s2
129+
; CHECK-NEXT: fadd s0, s0, s1
130+
; CHECK-NEXT: ldr q1, [x0, #16]
131+
; CHECK-NEXT: mov z2.s, z1.s[1]
132+
; CHECK-NEXT: fadd s0, s0, s1
133+
; CHECK-NEXT: fadd s0, s0, s2
134+
; CHECK-NEXT: mov z2.s, z1.s[2]
135+
; CHECK-NEXT: mov z1.s, z1.s[3]
136+
; CHECK-NEXT: fadd s0, s0, s2
137+
; CHECK-NEXT: fadd s0, s0, s1
86138
; CHECK-NEXT: ret
87139
%op = load <8 x float>, ptr %a
88140
%res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
@@ -102,11 +154,10 @@ define double @fadda_v1f64(double %start, <1 x double> %a) {
102154
define double @fadda_v2f64(double %start, <2 x double> %a) {
103155
; CHECK-LABEL: fadda_v2f64:
104156
; CHECK: // %bb.0:
105-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
106-
; CHECK-NEXT: ptrue p0.d, vl2
107157
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
108-
; CHECK-NEXT: fadda d0, p0, d0, z1.d
109-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
158+
; CHECK-NEXT: fadd d0, d0, d1
159+
; CHECK-NEXT: mov z1.d, z1.d[1]
160+
; CHECK-NEXT: fadd d0, d0, d1
110161
; CHECK-NEXT: ret
111162
%res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
112163
ret double %res
@@ -115,12 +166,14 @@ define double @fadda_v2f64(double %start, <2 x double> %a) {
115166
define double @fadda_v4f64(double %start, ptr %a) {
116167
; CHECK-LABEL: fadda_v4f64:
117168
; CHECK: // %bb.0:
118-
; CHECK-NEXT: ldp q1, q2, [x0]
119-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
120-
; CHECK-NEXT: ptrue p0.d, vl2
121-
; CHECK-NEXT: fadda d0, p0, d0, z1.d
122-
; CHECK-NEXT: fadda d0, p0, d0, z2.d
123-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
169+
; CHECK-NEXT: ldr q1, [x0]
170+
; CHECK-NEXT: fadd d0, d0, d1
171+
; CHECK-NEXT: mov z1.d, z1.d[1]
172+
; CHECK-NEXT: fadd d0, d0, d1
173+
; CHECK-NEXT: ldr q1, [x0, #16]
174+
; CHECK-NEXT: fadd d0, d0, d1
175+
; CHECK-NEXT: mov z1.d, z1.d[1]
176+
; CHECK-NEXT: fadd d0, d0, d1
124177
; CHECK-NEXT: ret
125178
%op = load <4 x double>, ptr %a
126179
%res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)

0 commit comments

Comments
 (0)