Skip to content

Commit dcdf354

Browse files
committed
[AArch64][SME] Reuse ZT0 spill slot
Previously, we'd allocate a new spill slot each time we needed to spill ZT0, which grows the stack size for each spill. Saving the spill slot in FuncInfo will also allow us to reload the spill on entry to exception handlers.
1 parent 24a01d6 commit dcdf354

File tree

4 files changed

+36
-20
lines changed

4 files changed

+36
-20
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8023,6 +8023,17 @@ static bool isPassedInFPR(EVT VT) {
80238023
(VT.isFloatingPoint() && !VT.isScalableVector());
80248024
}
80258025

8026+
static SDValue getZT0FrameIndex(MachineFrameInfo &MFI,
8027+
AArch64FunctionInfo &FuncInfo,
8028+
SelectionDAG &DAG) {
8029+
if (!FuncInfo.hasZT0SpillSlotIndex())
8030+
FuncInfo.setZT0SpillSlotIndex(MFI.CreateSpillStackObject(64, Align(16)));
8031+
8032+
return DAG.getFrameIndex(
8033+
FuncInfo.getZT0SpillSlotIndex(),
8034+
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
8035+
}
8036+
80268037
SDValue AArch64TargetLowering::lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
80278038
SelectionDAG &DAG) const {
80288039
assert(Chain.getOpcode() == ISD::EntryToken && "Unexpected Chain value");
@@ -9427,10 +9438,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
94279438
// If the caller has ZT0 state which will not be preserved by the callee,
94289439
// spill ZT0 before the call.
94299440
if (ShouldPreserveZT0) {
9430-
unsigned ZTObj = MFI.CreateSpillStackObject(64, Align(16));
9431-
ZTFrameIdx = DAG.getFrameIndex(
9432-
ZTObj,
9433-
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
9441+
ZTFrameIdx = getZT0FrameIndex(MFI, *FuncInfo, DAG);
94349442

94359443
Chain = DAG.getNode(AArch64ISD::SAVE_ZT, DL, DAG.getVTList(MVT::Other),
94369444
{Chain, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
239239
// support).
240240
Register EarlyAllocSMESaveBuffer = AArch64::NoRegister;
241241

242+
// Holds the spill slot for ZT0.
243+
int ZT0SpillSlotIndex = std::numeric_limits<int>::max();
244+
242245
// Note: The following properties are only used for the old SME ABI lowering:
243246
/// The frame-index for the TPIDR2 object used for lazy saves.
244247
TPIDR2Object TPIDR2;
@@ -265,6 +268,15 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
265268
return EarlyAllocSMESaveBuffer;
266269
}
267270

271+
void setZT0SpillSlotIndex(int FI) { ZT0SpillSlotIndex = FI; }
272+
int getZT0SpillSlotIndex() const {
273+
assert(hasZT0SpillSlotIndex() && "ZT0 spill slot index not set!");
274+
return ZT0SpillSlotIndex;
275+
}
276+
bool hasZT0SpillSlotIndex() const {
277+
return ZT0SpillSlotIndex != std::numeric_limits<int>::max();
278+
}
279+
268280
// Old SME ABI lowering state getters/setters:
269281
Register getSMESaveBufferAddr() const { return SMESaveBufferAddr; };
270282
void setSMESaveBufferAddr(Register Reg) { SMESaveBufferAddr = Reg; };

llvm/test/CodeGen/AArch64/sme-peephole-opts.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -224,22 +224,21 @@ define float @test6(float %f) nounwind "aarch64_pstate_sm_enabled" {
224224
define void @test7() nounwind "aarch64_inout_zt0" {
225225
; CHECK-LABEL: test7:
226226
; CHECK: // %bb.0:
227-
; CHECK-NEXT: sub sp, sp, #144
228-
; CHECK-NEXT: stp x30, x19, [sp, #128] // 16-byte Folded Spill
229-
; CHECK-NEXT: add x19, sp, #64
227+
; CHECK-NEXT: sub sp, sp, #80
228+
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
229+
; CHECK-NEXT: mov x19, sp
230230
; CHECK-NEXT: str zt0, [x19]
231231
; CHECK-NEXT: smstop za
232232
; CHECK-NEXT: bl callee
233233
; CHECK-NEXT: smstart za
234234
; CHECK-NEXT: ldr zt0, [x19]
235-
; CHECK-NEXT: mov x19, sp
236235
; CHECK-NEXT: str zt0, [x19]
237236
; CHECK-NEXT: smstop za
238237
; CHECK-NEXT: bl callee
239238
; CHECK-NEXT: smstart za
240239
; CHECK-NEXT: ldr zt0, [x19]
241-
; CHECK-NEXT: ldp x30, x19, [sp, #128] // 16-byte Folded Reload
242-
; CHECK-NEXT: add sp, sp, #144
240+
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
241+
; CHECK-NEXT: add sp, sp, #80
243242
; CHECK-NEXT: ret
244243
call void @callee()
245244
call void @callee()

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -391,37 +391,34 @@ define void @shared_za_new_zt0(ptr %callee) "aarch64_inout_za" "aarch64_new_zt0"
391391
define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwind {
392392
; CHECK-COMMON-LABEL: zt0_multiple_private_za_calls:
393393
; CHECK-COMMON: // %bb.0:
394-
; CHECK-COMMON-NEXT: sub sp, sp, #288
395-
; CHECK-COMMON-NEXT: stp x20, x19, [sp, #272] // 16-byte Folded Spill
396-
; CHECK-COMMON-NEXT: add x20, sp, #192
394+
; CHECK-COMMON-NEXT: sub sp, sp, #96
395+
; CHECK-COMMON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
396+
; CHECK-COMMON-NEXT: mov x20, sp
397397
; CHECK-COMMON-NEXT: mov x19, x0
398-
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill
398+
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
399399
; CHECK-COMMON-NEXT: str zt0, [x20]
400400
; CHECK-COMMON-NEXT: smstop za
401401
; CHECK-COMMON-NEXT: blr x0
402402
; CHECK-COMMON-NEXT: smstart za
403403
; CHECK-COMMON-NEXT: ldr zt0, [x20]
404-
; CHECK-COMMON-NEXT: add x20, sp, #128
405404
; CHECK-COMMON-NEXT: str zt0, [x20]
406405
; CHECK-COMMON-NEXT: smstop za
407406
; CHECK-COMMON-NEXT: blr x19
408407
; CHECK-COMMON-NEXT: smstart za
409408
; CHECK-COMMON-NEXT: ldr zt0, [x20]
410-
; CHECK-COMMON-NEXT: add x20, sp, #64
411409
; CHECK-COMMON-NEXT: str zt0, [x20]
412410
; CHECK-COMMON-NEXT: smstop za
413411
; CHECK-COMMON-NEXT: blr x19
414412
; CHECK-COMMON-NEXT: smstart za
415413
; CHECK-COMMON-NEXT: ldr zt0, [x20]
416-
; CHECK-COMMON-NEXT: mov x20, sp
417414
; CHECK-COMMON-NEXT: str zt0, [x20]
418415
; CHECK-COMMON-NEXT: smstop za
419416
; CHECK-COMMON-NEXT: blr x19
420417
; CHECK-COMMON-NEXT: smstart za
421418
; CHECK-COMMON-NEXT: ldr zt0, [x20]
422-
; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #272] // 16-byte Folded Reload
423-
; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
424-
; CHECK-COMMON-NEXT: add sp, sp, #288
419+
; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
420+
; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
421+
; CHECK-COMMON-NEXT: add sp, sp, #96
425422
; CHECK-COMMON-NEXT: ret
426423
call void %callee()
427424
call void %callee()

0 commit comments

Comments
 (0)