Skip to content

Commit ad3c72b

Browse files
committed
[AArch64] Observe Z-reg inline asm clobbers without SVE
inline asm that clobbers any of the z-registers when not in streaming mode, should still observe that the lower 128 bits of those registers are clobbered.
1 parent 4e44166 commit ad3c72b

File tree

2 files changed

+106
-10
lines changed

2 files changed

+106
-10
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12248,21 +12248,24 @@ enum class PredicateConstraint { Uph, Upl, Upa };
1224812248
// not what we want. The code here pre-empts this by matching the register
1224912249
// explicitly.
1225012250
static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
12251-
parsePredicateRegAsConstraint(StringRef Constraint) {
12251+
parseSVERegAsConstraint(StringRef Constraint) {
1225212252
if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
12253-
Constraint[1] != 'p')
12253+
(Constraint[1] != 'p' && Constraint[1] != 'z'))
1225412254
return std::nullopt;
1225512255

12256+
bool IsPredicate = Constraint[1] == 'p';
1225612257
Constraint = Constraint.substr(2, Constraint.size() - 3);
12257-
bool IsPredicateAsCount = Constraint.starts_with("n");
12258+
bool IsPredicateAsCount = IsPredicate && Constraint.starts_with("n");
1225812259
if (IsPredicateAsCount)
1225912260
Constraint = Constraint.drop_front(1);
1226012261

1226112262
unsigned V;
1226212263
if (Constraint.getAsInteger(10, V) || V > 31)
1226312264
return std::nullopt;
1226412265

12265-
if (IsPredicateAsCount)
12266+
if (!IsPredicate)
12267+
return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
12268+
else if (IsPredicateAsCount)
1226612269
return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
1226712270
else
1226812271
return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
@@ -12515,8 +12518,18 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
1251512518
break;
1251612519
}
1251712520
} else {
12518-
if (const auto P = parsePredicateRegAsConstraint(Constraint))
12519-
return *P;
12521+
if (const auto P = parseSVERegAsConstraint(Constraint)) {
12522+
// SME functions that are not in streaming mode, should
12523+
// still observe clobbers of Z-registers by clobbering
12524+
// the lower 128bits of those registers.
12525+
if (AArch64::ZPRRegClass.hasSubClassEq(P->second) &&
12526+
!Subtarget->hasSVE() && Subtarget->hasSME() &&
12527+
!Subtarget->isStreaming())
12528+
return std::make_pair(TRI->getSubReg(P->first, AArch64::zsub),
12529+
&AArch64::FPR128RegClass);
12530+
else
12531+
return *P;
12532+
}
1252012533
if (const auto PC = parsePredicateConstraint(Constraint))
1252112534
if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
1252212535
return std::make_pair(0U, RegClass);

llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sme2 -force-streaming -stop-after=finalize-isel | FileCheck %s
1+
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -stop-after=finalize-isel | FileCheck %s
22

3-
define void @UphPNR(target("aarch64.svcount") %predcnt) {
3+
define void @UphPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
44
entry:
55
; CHECK: %0:ppr = COPY $p0
66
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -14,7 +14,7 @@ entry:
1414
ret void
1515
}
1616

17-
define void @UpaPNR(target("aarch64.svcount") %predcnt) {
17+
define void @UpaPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
1818
entry:
1919
; CHECK: %0:ppr = COPY $p0
2020
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -28,7 +28,7 @@ entry:
2828
ret void
2929
}
3030

31-
define void @UplPNR(target("aarch64.svcount") %predcnt) {
31+
define void @UplPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
3232
entry:
3333
; CHECK: %0:ppr = COPY $p0
3434
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -41,3 +41,86 @@ entry:
4141
call void asm sideeffect "fadd z0.h, $0/m, z0.h, #0.5", "@3Upl"(target("aarch64.svcount") %0)
4242
ret void
4343
}
44+
45+
; Test that the z-register clobbers result in preserving %0 across the inline asm call.
46+
define <2 x float> @sme_nosve_nonstreaming(ptr %in) "target-features"="+sme,-sve" {
47+
entry:
48+
; CHECK-LABEL: name: sme_nosve_nonstreaming
49+
; CHECK: INLINEASM &"smstart sm; smstop sm;"
50+
; CHECK-SAME: implicit-def early-clobber $q0
51+
; CHECK-SAME: implicit-def early-clobber $q1
52+
; CHECK-SAME: implicit-def early-clobber $q2
53+
; CHECK-SAME: implicit-def early-clobber $q3
54+
; CHECK-SAME: implicit-def early-clobber $q4
55+
; CHECK-SAME: implicit-def early-clobber $q5
56+
; CHECK-SAME: implicit-def early-clobber $q6
57+
; CHECK-SAME: implicit-def early-clobber $q7
58+
; CHECK-SAME: implicit-def early-clobber $q8
59+
; CHECK-SAME: implicit-def early-clobber $q9
60+
; CHECK-SAME: implicit-def early-clobber $q10
61+
; CHECK-SAME: implicit-def early-clobber $q11
62+
; CHECK-SAME: implicit-def early-clobber $q12
63+
; CHECK-SAME: implicit-def early-clobber $q13
64+
; CHECK-SAME: implicit-def early-clobber $q14
65+
; CHECK-SAME: implicit-def early-clobber $q15
66+
; CHECK-SAME: implicit-def early-clobber $q16
67+
; CHECK-SAME: implicit-def early-clobber $q17
68+
; CHECK-SAME: implicit-def early-clobber $q18
69+
; CHECK-SAME: implicit-def early-clobber $q19
70+
; CHECK-SAME: implicit-def early-clobber $q20
71+
; CHECK-SAME: implicit-def early-clobber $q21
72+
; CHECK-SAME: implicit-def early-clobber $q22
73+
; CHECK-SAME: implicit-def early-clobber $q23
74+
; CHECK-SAME: implicit-def early-clobber $q24
75+
; CHECK-SAME: implicit-def early-clobber $q25
76+
; CHECK-SAME: implicit-def early-clobber $q26
77+
; CHECK-SAME: implicit-def early-clobber $q27
78+
; CHECK-SAME: implicit-def early-clobber $q28
79+
; CHECK-SAME: implicit-def early-clobber $q29
80+
; CHECK-SAME: implicit-def early-clobber $q30
81+
; CHECK-SAME: implicit-def early-clobber $q31
82+
%0 = load <2 x float>, ptr %in, align 8
83+
call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
84+
ret <2 x float> %0
85+
}
86+
87+
define <2 x float> @sme_nosve_streaming(ptr %in) "target-features"="+sme,-sve" "aarch64_pstate_sm_enabled" {
88+
entry:
89+
; CHECK-LABEL: name: sme_nosve_streaming
90+
; CHECK: INLINEASM &"smstart sm; smstop sm;"
91+
; CHECK-SAME: implicit-def early-clobber $z0
92+
; CHECK-SAME: implicit-def early-clobber $z1
93+
; CHECK-SAME: implicit-def early-clobber $z2
94+
; CHECK-SAME: implicit-def early-clobber $z3
95+
; CHECK-SAME: implicit-def early-clobber $z4
96+
; CHECK-SAME: implicit-def early-clobber $z5
97+
; CHECK-SAME: implicit-def early-clobber $z6
98+
; CHECK-SAME: implicit-def early-clobber $z7
99+
; CHECK-SAME: implicit-def early-clobber $z8
100+
; CHECK-SAME: implicit-def early-clobber $z9
101+
; CHECK-SAME: implicit-def early-clobber $z10
102+
; CHECK-SAME: implicit-def early-clobber $z11
103+
; CHECK-SAME: implicit-def early-clobber $z12
104+
; CHECK-SAME: implicit-def early-clobber $z13
105+
; CHECK-SAME: implicit-def early-clobber $z14
106+
; CHECK-SAME: implicit-def early-clobber $z15
107+
; CHECK-SAME: implicit-def early-clobber $z16
108+
; CHECK-SAME: implicit-def early-clobber $z17
109+
; CHECK-SAME: implicit-def early-clobber $z18
110+
; CHECK-SAME: implicit-def early-clobber $z19
111+
; CHECK-SAME: implicit-def early-clobber $z20
112+
; CHECK-SAME: implicit-def early-clobber $z21
113+
; CHECK-SAME: implicit-def early-clobber $z22
114+
; CHECK-SAME: implicit-def early-clobber $z23
115+
; CHECK-SAME: implicit-def early-clobber $z24
116+
; CHECK-SAME: implicit-def early-clobber $z25
117+
; CHECK-SAME: implicit-def early-clobber $z26
118+
; CHECK-SAME: implicit-def early-clobber $z27
119+
; CHECK-SAME: implicit-def early-clobber $z28
120+
; CHECK-SAME: implicit-def early-clobber $z29
121+
; CHECK-SAME: implicit-def early-clobber $z30
122+
; CHECK-SAME: implicit-def early-clobber $z31
123+
%0 = load <2 x float>, ptr %in, align 8
124+
call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
125+
ret <2 x float> %0
126+
}

0 commit comments

Comments
 (0)