Skip to content

Commit af48791

Browse files
committed
ZA commit cond
Change-Id: I149c3d7d14713bd75993760e4155d07a276853da
1 parent af0c8b4 commit af48791

File tree

6 files changed

+37
-37
lines changed

6 files changed

+37
-37
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,8 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
990990
return true;
991991
}
992992

993+
static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
994+
993995
MachineBasicBlock *
994996
AArch64ExpandPseudo::expandCommitOrRestoreZA(MachineBasicBlock &MBB,
995997
MachineBasicBlock::iterator MBBI) {
@@ -1029,8 +1031,21 @@ AArch64ExpandPseudo::expandCommitOrRestoreZA(MachineBasicBlock &MBB,
10291031
// Replace the pseudo with a call (BL).
10301032
MachineInstrBuilder MIB =
10311033
BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
1032-
if (IsRestoreZA)
1034+
if (IsRestoreZA) {
10331035
MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
1036+
} else /*CommitZA*/ {
1037+
auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1038+
// Clear TPIDR2_EL0.
1039+
BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::MSR))
1040+
.addImm(AArch64SysReg::TPIDR2_EL0)
1041+
.addReg(AArch64::XZR);
1042+
bool ZeroZA = MI.definesRegister(AArch64::ZAB0, TRI);
1043+
if (ZeroZA) {
1044+
BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::ZERO_M))
1045+
.addImm(ZERO_ALL_ZA_MASK)
1046+
.addDef(AArch64::ZAB0, RegState::ImplicitDefine);
1047+
}
1048+
}
10341049
unsigned FirstBLOperand = IsRestoreZA ? 2 : 1;
10351050
for (unsigned I = FirstBLOperand; I < MI.getNumOperands(); ++I)
10361051
MIB.add(MI.getOperand(I));

llvm/lib/Target/AArch64/MachineSMEABIPass.cpp

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -534,18 +534,6 @@ void MachineSMEABI::emitAllocateLazySaveBuffer(
534534
}
535535
}
536536

537-
static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
538-
static void emitZeroZA(const TargetInstrInfo &TII, DebugLoc DL,
539-
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
540-
unsigned Mask = ZERO_ALL_ZA_MASK) {
541-
MachineInstrBuilder MIB =
542-
BuildMI(MBB, MBBI, DL, TII.get(AArch64::ZERO_M)).addImm(Mask);
543-
for (unsigned I = 0; I < 8; I++) {
544-
if (Mask & (1 << I))
545-
MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
546-
}
547-
}
548-
549537
void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
550538
MachineBasicBlock::iterator MBBI) {
551539
auto *TLI = Subtarget->getTargetLowering();
@@ -557,21 +545,18 @@ void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
557545
.addReg(TPIDR2EL0, RegState::Define)
558546
.addImm(AArch64SysReg::TPIDR2_EL0);
559547
// If TPIDR2_EL0 is non-zero, commit the lazy save.
560-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZAPseudo))
561-
.addReg(TPIDR2EL0)
562-
.addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE))
563-
.addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
564-
// Clear TPIDR2_EL0.
565-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
566-
.addImm(AArch64SysReg::TPIDR2_EL0)
567-
.addReg(AArch64::XZR);
548+
auto CommitZA =
549+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZAPseudo))
550+
.addReg(TPIDR2EL0)
551+
.addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE))
552+
.addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
553+
// NOTE: Functions that only use ZT0 don't need to zero ZA.
554+
if (MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs().hasZAState())
555+
CommitZA.addDef(AArch64::ZAB0, RegState::ImplicitDefine);
568556
// Enable ZA (as ZA could have previously been in the OFF state).
569557
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
570558
.addImm(AArch64SVCR::SVCRZA)
571559
.addImm(1);
572-
// NOTE: Functions that only use ZT0 don't need to zero ZA.
573-
if (MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs().hasZAState())
574-
emitZeroZA(*TII, DL, MBB, MBBI);
575560
}
576561

577562
void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -395,11 +395,11 @@ define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za"
395395
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB4_2
396396
; CHECK-NEWLOWERING-NEXT: // %bb.1:
397397
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
398-
; CHECK-NEWLOWERING-NEXT: .LBB4_2:
399398
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
399+
; CHECK-NEWLOWERING-NEXT: zero {za}
400+
; CHECK-NEWLOWERING-NEXT: .LBB4_2:
400401
; CHECK-NEWLOWERING-NEXT: smstart za
401402
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
402-
; CHECK-NEWLOWERING-NEXT: zero {za}
403403
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
404404
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
405405
; CHECK-NEWLOWERING-NEXT: smstart za

llvm/test/CodeGen/AArch64/sme-new-za-function.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ define void @private_za() "aarch64_new_za" {
3737
; CHECK-NEWLOWERING-NEXT: b .LBB0_2
3838
; CHECK-NEWLOWERING-NEXT: .LBB0_1:
3939
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
40+
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
41+
; CHECK-NEWLOWERING-NEXT: zero {za}
4042
; CHECK-NEWLOWERING-NEXT: b .LBB0_2
4143
; CHECK-NEWLOWERING-NEXT: .LBB0_2:
42-
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
4344
; CHECK-NEWLOWERING-NEXT: smstart za
44-
; CHECK-NEWLOWERING-NEXT: zero {za}
4545
; CHECK-NEWLOWERING-NEXT: bl shared_za_callee
4646
; CHECK-NEWLOWERING-NEXT: smstop za
4747
; CHECK-NEWLOWERING-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -104,11 +104,11 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za"
104104
; CHECK-NEWLOWERING-NEXT: b .LBB1_2
105105
; CHECK-NEWLOWERING-NEXT: .LBB1_1: // %entry
106106
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
107+
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
108+
; CHECK-NEWLOWERING-NEXT: zero {za}
107109
; CHECK-NEWLOWERING-NEXT: b .LBB1_2
108110
; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %entry
109-
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
110111
; CHECK-NEWLOWERING-NEXT: smstart za
111-
; CHECK-NEWLOWERING-NEXT: zero {za}
112112
; CHECK-NEWLOWERING-NEXT: str w1, [sp, #8] // 4-byte Folded Spill
113113
; CHECK-NEWLOWERING-NEXT: str w0, [sp, #12] // 4-byte Folded Spill
114114
; CHECK-NEWLOWERING-NEXT: subs x8, x2, #1

llvm/test/CodeGen/AArch64/sme-za-exceptions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,10 @@ define void @try_catch_shared_za_callee() "aarch64_new_za" personality ptr @__gx
233233
; CHECK-NEXT: cbz x8, .LBB2_2
234234
; CHECK-NEXT: // %bb.1:
235235
; CHECK-NEXT: bl __arm_tpidr2_save
236-
; CHECK-NEXT: .LBB2_2:
237236
; CHECK-NEXT: msr TPIDR2_EL0, xzr
238-
; CHECK-NEXT: smstart za
239237
; CHECK-NEXT: zero {za}
238+
; CHECK-NEXT: .LBB2_2:
239+
; CHECK-NEXT: smstart za
240240
; CHECK-NEXT: .Ltmp6:
241241
; CHECK-NEXT: bl shared_za_call
242242
; CHECK-NEXT: .Ltmp7:

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,8 @@ define void @zt0_new_caller_zt0_new_callee(ptr %callee) "aarch64_new_zt0" nounwi
201201
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB6_2
202202
; CHECK-NEWLOWERING-NEXT: // %bb.1:
203203
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
204-
; CHECK-NEWLOWERING-NEXT: .LBB6_2:
205204
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
205+
; CHECK-NEWLOWERING-NEXT: .LBB6_2:
206206
; CHECK-NEWLOWERING-NEXT: smstart za
207207
; CHECK-NEWLOWERING-NEXT: zero { zt0 }
208208
; CHECK-NEWLOWERING-NEXT: mov x19, sp
@@ -254,8 +254,8 @@ define i64 @zt0_new_caller_abi_routine_callee() "aarch64_new_zt0" nounwind {
254254
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB7_2
255255
; CHECK-NEWLOWERING-NEXT: // %bb.1:
256256
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
257-
; CHECK-NEWLOWERING-NEXT: .LBB7_2:
258257
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
258+
; CHECK-NEWLOWERING-NEXT: .LBB7_2:
259259
; CHECK-NEWLOWERING-NEXT: smstart za
260260
; CHECK-NEWLOWERING-NEXT: zero { zt0 }
261261
; CHECK-NEWLOWERING-NEXT: mov x19, sp
@@ -304,8 +304,8 @@ define void @zt0_new_caller(ptr %callee) "aarch64_new_zt0" nounwind {
304304
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB8_2
305305
; CHECK-NEWLOWERING-NEXT: // %bb.1:
306306
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
307-
; CHECK-NEWLOWERING-NEXT: .LBB8_2:
308307
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
308+
; CHECK-NEWLOWERING-NEXT: .LBB8_2:
309309
; CHECK-NEWLOWERING-NEXT: smstart za
310310
; CHECK-NEWLOWERING-NEXT: zero { zt0 }
311311
; CHECK-NEWLOWERING-NEXT: blr x0
@@ -344,10 +344,10 @@ define void @new_za_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_new_zt0" n
344344
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB9_2
345345
; CHECK-NEWLOWERING-NEXT: // %bb.1:
346346
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
347-
; CHECK-NEWLOWERING-NEXT: .LBB9_2:
348347
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
349-
; CHECK-NEWLOWERING-NEXT: smstart za
350348
; CHECK-NEWLOWERING-NEXT: zero {za}
349+
; CHECK-NEWLOWERING-NEXT: .LBB9_2:
350+
; CHECK-NEWLOWERING-NEXT: smstart za
351351
; CHECK-NEWLOWERING-NEXT: zero { zt0 }
352352
; CHECK-NEWLOWERING-NEXT: blr x0
353353
; CHECK-NEWLOWERING-NEXT: smstop za

0 commit comments

Comments
 (0)