Skip to content

Commit 818b39e

Browse files
authored
[AArch64][SME] Simplify initialization of the TPIDR2 block (#141049)
This patch updates the definition of `AArch64ISD::INIT_TPIDR2OBJ` to take the number of save slices (which is currently always all ZA slices). Using this, we can initialize the TPIDR2 block with a single STP of the save buffer pointer and the number of save slices. The reserved bytes (10-15) will be implicitly zeroed as the result of RDSVL will always be <= 16-bits. Note: We used to write the number of save slices to the TPIDR2 block before every call with a lazy save; however, based on 6.6.9 "Changes to the TPIDR2 block" in the aapcs64 (https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#changes-to-the-tpidr2-block), it seems we can rely on callers preserving the contents of the TPIDR2 block.
1 parent 88f8ab0 commit 818b39e

File tree

10 files changed

+392
-767
lines changed

10 files changed

+392
-767
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2984,21 +2984,20 @@ AArch64TargetLowering::EmitInitTPIDR2Object(MachineInstr &MI,
29842984
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
29852985
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
29862986
if (TPIDR2.Uses > 0) {
2987+
// Note: This case just needs to do `SVL << 48`. It is not implemented as we
2988+
// generally don't support big-endian SVE/SME.
2989+
if (!Subtarget->isLittleEndian())
2990+
reportFatalInternalError(
2991+
"TPIDR2 block initialization is not supported on big-endian targets");
2992+
29872993
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2988-
// Store the buffer pointer to the TPIDR2 stack object.
2989-
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
2994+
// Store buffer pointer and num_za_save_slices.
2995+
// Bytes 10-15 are implicitly zeroed.
2996+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STPXi))
29902997
.addReg(MI.getOperand(0).getReg())
2998+
.addReg(MI.getOperand(1).getReg())
29912999
.addFrameIndex(TPIDR2.FrameIndex)
29923000
.addImm(0);
2993-
// Set the reserved bytes (10-15) to zero
2994-
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
2995-
.addReg(AArch64::WZR)
2996-
.addFrameIndex(TPIDR2.FrameIndex)
2997-
.addImm(5);
2998-
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
2999-
.addReg(AArch64::WZR)
3000-
.addFrameIndex(TPIDR2.FrameIndex)
3001-
.addImm(3);
30023001
} else
30033002
MFI.RemoveStackObject(TPIDR2.FrameIndex);
30043003

@@ -8313,9 +8312,12 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
83138312
{Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
83148313
MFI.CreateVariableSizedObject(Align(16), nullptr);
83158314
}
8315+
SDValue NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
8316+
DAG.getConstant(1, DL, MVT::i32));
83168317
Chain = DAG.getNode(
83178318
AArch64ISD::INIT_TPIDR2OBJ, DL, DAG.getVTList(MVT::Other),
8318-
{/*Chain*/ Buffer.getValue(1), /*Buffer ptr*/ Buffer.getValue(0)});
8319+
{/*Chain*/ Buffer.getValue(1), /*Buffer ptr*/ Buffer.getValue(0),
8320+
/*Num save slices*/ NumZaSaveSlices});
83198321
} else if (Attrs.hasAgnosticZAInterface()) {
83208322
// Call __arm_sme_state_size().
83218323
SDValue BufferSize =
@@ -9165,19 +9167,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
91659167
bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.requiresLazySave();
91669168
bool RequiresSaveAllZA = CallAttrs.requiresPreservingAllZAState();
91679169
if (RequiresLazySave) {
9168-
const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
9169-
MachinePointerInfo MPI =
9170-
MachinePointerInfo::getStack(MF, TPIDR2.FrameIndex);
9170+
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
91719171
SDValue TPIDR2ObjAddr = DAG.getFrameIndex(
91729172
TPIDR2.FrameIndex,
91739173
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
9174-
SDValue NumZaSaveSlicesAddr =
9175-
DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr,
9176-
DAG.getConstant(8, DL, TPIDR2ObjAddr.getValueType()));
9177-
SDValue NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
9178-
DAG.getConstant(1, DL, MVT::i32));
9179-
Chain = DAG.getTruncStore(Chain, DL, NumZaSaveSlices, NumZaSaveSlicesAddr,
9180-
MPI, MVT::i16);
91819174
Chain = DAG.getNode(
91829175
ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
91839176
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ let usesCustomInserter = 1, Defs = [SP], Uses = [SP] in {
6161
def : Pat<(i64 (AArch64AllocateZABuffer GPR64:$size)),
6262
(AllocateZABuffer $size)>;
6363

64-
def AArch64InitTPIDR2Obj : SDNode<"AArch64ISD::INIT_TPIDR2OBJ", SDTypeProfile<0, 1,
65-
[SDTCisInt<0>]>, [SDNPHasChain, SDNPMayStore]>;
64+
def AArch64InitTPIDR2Obj : SDNode<"AArch64ISD::INIT_TPIDR2OBJ", SDTypeProfile<0, 2,
65+
[SDTCisInt<0>, SDTCisInt<1>]>, [SDNPHasChain, SDNPMayStore]>;
6666
let usesCustomInserter = 1 in {
67-
def InitTPIDR2Obj : Pseudo<(outs), (ins GPR64:$buffer), [(AArch64InitTPIDR2Obj GPR64:$buffer)]>, Sched<[WriteI]> {}
67+
def InitTPIDR2Obj : Pseudo<(outs), (ins GPR64:$buffer, GPR64:$save_slices), [(AArch64InitTPIDR2Obj GPR64:$buffer, GPR64:$save_slices)]>, Sched<[WriteI]> {}
6868
}
6969

7070
// Nodes to allocate a save buffer for SME.

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,7 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
250250
; CHECK-COMMON-NEXT: mov x9, sp
251251
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
252252
; CHECK-COMMON-NEXT: mov sp, x9
253-
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
254-
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
255-
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
256-
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
253+
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
257254
; CHECK-COMMON-NEXT: sub x8, x29, #16
258255
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
259256
; CHECK-COMMON-NEXT: bl normal_callee
@@ -292,12 +289,9 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
292289
; CHECK-COMMON-NEXT: mov x9, sp
293290
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
294291
; CHECK-COMMON-NEXT: mov sp, x9
295-
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
296-
; CHECK-COMMON-NEXT: sub x9, x29, #16
297-
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
298-
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
299-
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
300-
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
292+
; CHECK-COMMON-NEXT: sub x10, x29, #16
293+
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
294+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
301295
; CHECK-COMMON-NEXT: bl __addtf3
302296
; CHECK-COMMON-NEXT: smstart za
303297
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
@@ -356,12 +350,9 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
356350
; CHECK-COMMON-NEXT: mov x9, sp
357351
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
358352
; CHECK-COMMON-NEXT: mov sp, x9
359-
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
360-
; CHECK-COMMON-NEXT: sub x9, x29, #16
361-
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
362-
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
363-
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
364-
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
353+
; CHECK-COMMON-NEXT: sub x10, x29, #16
354+
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
355+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
365356
; CHECK-COMMON-NEXT: bl fmod
366357
; CHECK-COMMON-NEXT: smstart za
367358
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0

0 commit comments

Comments
 (0)