Skip to content

Commit fbf514f

Browse files
committed
[AArch64] Prepare for split ZPR and PPR area allocation (NFCI)
This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot.
1 parent d4a5ccc commit fbf514f

File tree

8 files changed

+296
-174
lines changed

8 files changed

+296
-174
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 154 additions & 108 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ class TargetLowering;
2323
class AArch64FunctionInfo;
2424
class AArch64PrologueEmitter;
2525

26+
struct SVEStackSizes {
27+
uint64_t ZPRStackSize{0};
28+
uint64_t PPRStackSize{0};
29+
};
30+
2631
class AArch64FrameLowering : public TargetFrameLowering {
2732
public:
2833
explicit AArch64FrameLowering()
@@ -147,7 +152,16 @@ class AArch64FrameLowering : public TargetFrameLowering {
147152

148153
bool requiresSaveVG(const MachineFunction &MF) const;
149154

150-
StackOffset getSVEStackSize(const MachineFunction &MF) const;
155+
/// Returns the size of the entire ZPR stackframe (calleesaves + spills).
156+
StackOffset getZPRStackSize(const MachineFunction &MF) const;
157+
158+
/// Returns the size of the entire PPR stackframe (calleesaves + spills).
159+
StackOffset getPPRStackSize(const MachineFunction &MF) const;
160+
161+
/// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
162+
StackOffset getSVEStackSize(const MachineFunction &MF) const {
163+
return getZPRStackSize(MF) + getPPRStackSize(MF);
164+
}
151165

152166
protected:
153167
bool hasFPImpl(const MachineFunction &MF) const override;
@@ -166,10 +180,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
166180
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
167181
uint64_t StackBumpBytes) const;
168182

169-
int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
170-
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
171-
int &MinCSFrameIndex,
172-
int &MaxCSFrameIndex) const;
173183
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
174184
uint64_t StackBumpBytes) const;
175185
void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB,
@@ -181,6 +191,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
181191
int64_t RealignmentPadding, StackOffset AllocSize,
182192
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
183193
StackOffset InitialOffset, bool FollowupAllocs) const;
194+
184195
/// Make a determination whether a Hazard slot is used and create it if
185196
/// needed.
186197
void determineStackHazardSlot(MachineFunction &MF,
@@ -248,7 +259,19 @@ class AArch64FrameLowering : public TargetFrameLowering {
248259
bool NeedsWinCFI,
249260
bool *HasWinCFI) const;
250261

251-
bool isSVECalleeSave(MachineBasicBlock::iterator I) const;
262+
// Convenience function to determine whether I is part of the ZPR callee
263+
// saves.
264+
bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) const;
265+
266+
// Convenience function to determine whether I is part of the PPR callee
267+
// saves.
268+
bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) const;
269+
270+
// Convenience function to determine whether I is part of the SVE callee
271+
// saves.
272+
bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) const {
273+
return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I);
274+
}
252275

253276
/// Returns the size of the fixed object area (allocated next to sp on entry)
254277
/// On Win64 this may include a var args area and an UnwindHelp object for EH.

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,21 @@
2323

2424
using namespace llvm;
2525

26+
static std::optional<uint64_t>
27+
getSVEStackSize(const AArch64FunctionInfo &MFI,
28+
uint64_t (AArch64FunctionInfo::*GetStackSize)() const) {
29+
if (!MFI.hasCalculatedStackSizeSVE())
30+
return std::nullopt;
31+
return (MFI.*GetStackSize)();
32+
}
33+
2634
yaml::AArch64FunctionInfo::AArch64FunctionInfo(
2735
const llvm::AArch64FunctionInfo &MFI)
2836
: HasRedZone(MFI.hasRedZone()),
29-
StackSizeSVE(MFI.hasCalculatedStackSizeSVE()
30-
? std::optional<uint64_t>(MFI.getStackSizeSVE())
31-
: std::nullopt) {}
37+
StackSizeZPR(
38+
getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizeZPR)),
39+
StackSizePPR(
40+
getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)) {}
3241

3342
void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) {
3443
MappingTraits<AArch64FunctionInfo>::mapping(YamlIO, *this);
@@ -38,8 +47,9 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
3847
const yaml::AArch64FunctionInfo &YamlMFI) {
3948
if (YamlMFI.HasRedZone)
4049
HasRedZone = YamlMFI.HasRedZone;
41-
if (YamlMFI.StackSizeSVE)
42-
setStackSizeSVE(*YamlMFI.StackSizeSVE);
50+
if (YamlMFI.StackSizeZPR || YamlMFI.StackSizePPR)
51+
setStackSizeSVE(YamlMFI.StackSizeZPR.value_or(0),
52+
YamlMFI.StackSizePPR.value_or(0));
4353
}
4454

4555
static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
7474
/// Amount of stack frame size, not including callee-saved registers.
7575
uint64_t LocalStackSize = 0;
7676

77-
/// The start and end frame indices for the SVE callee saves.
78-
int MinSVECSFrameIndex = 0;
79-
int MaxSVECSFrameIndex = 0;
80-
8177
/// Amount of stack frame size used for saving callee-saved registers.
8278
unsigned CalleeSavedStackSize = 0;
83-
unsigned SVECalleeSavedStackSize = 0;
79+
unsigned ZPRCalleeSavedStackSize = 0;
80+
unsigned PPRCalleeSavedStackSize = 0;
8481
bool HasCalleeSavedStackSize = false;
8582
bool HasSVECalleeSavedStackSize = false;
8683

@@ -137,9 +134,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
137134
/// SVE stack size (for predicates and data vectors) are maintained here
138135
/// rather than in FrameInfo, as the placement and Stack IDs are target
139136
/// specific.
140-
uint64_t StackSizeSVE = 0;
137+
uint64_t StackSizeZPR = 0;
138+
uint64_t StackSizePPR = 0;
141139

142-
/// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
140+
/// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
143141
bool HasCalculatedStackSizeSVE = false;
144142

145143
/// Has a value when it is known whether or not the function uses a
@@ -300,16 +298,25 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
300298
TailCallReservedStack = bytes;
301299
}
302300

303-
bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
304-
305-
void setStackSizeSVE(uint64_t S) {
301+
void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) {
302+
StackSizeZPR = ZPR;
303+
StackSizePPR = PPR;
306304
HasCalculatedStackSizeSVE = true;
307-
StackSizeSVE = S;
308305
}
309306

310-
uint64_t getStackSizeSVE() const {
307+
uint64_t getStackSizeZPR() const {
311308
assert(hasCalculatedStackSizeSVE());
312-
return StackSizeSVE;
309+
return StackSizeZPR;
310+
}
311+
uint64_t getStackSizePPR() const {
312+
assert(hasCalculatedStackSizeSVE());
313+
return StackSizePPR;
314+
}
315+
316+
bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
317+
318+
bool hasSVEStackSize() const {
319+
return getStackSizeZPR() > 0 || getStackSizePPR() > 0;
313320
}
314321

315322
bool hasStackFrame() const { return HasStackFrame; }
@@ -402,23 +409,25 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
402409
}
403410

404411
// Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
405-
void setSVECalleeSavedStackSize(unsigned Size) {
406-
SVECalleeSavedStackSize = Size;
412+
void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) {
413+
ZPRCalleeSavedStackSize = ZPR;
414+
PPRCalleeSavedStackSize = PPR;
407415
HasSVECalleeSavedStackSize = true;
408416
}
409-
unsigned getSVECalleeSavedStackSize() const {
417+
unsigned getZPRCalleeSavedStackSize() const {
410418
assert(HasSVECalleeSavedStackSize &&
411-
"SVECalleeSavedStackSize has not been calculated");
412-
return SVECalleeSavedStackSize;
419+
"ZPRCalleeSavedStackSize has not been calculated");
420+
return ZPRCalleeSavedStackSize;
413421
}
414-
415-
void setMinMaxSVECSFrameIndex(int Min, int Max) {
416-
MinSVECSFrameIndex = Min;
417-
MaxSVECSFrameIndex = Max;
422+
unsigned getPPRCalleeSavedStackSize() const {
423+
assert(HasSVECalleeSavedStackSize &&
424+
"PPRCalleeSavedStackSize has not been calculated");
425+
return PPRCalleeSavedStackSize;
418426
}
419427

420-
int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
421-
int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
428+
unsigned getSVECalleeSavedStackSize() const {
429+
return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize();
430+
}
422431

423432
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
424433
unsigned getNumLocalDynamicTLSAccesses() const {
@@ -599,7 +608,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
599608
namespace yaml {
600609
struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
601610
std::optional<bool> HasRedZone;
602-
std::optional<uint64_t> StackSizeSVE;
611+
std::optional<uint64_t> StackSizeZPR;
612+
std::optional<uint64_t> StackSizePPR;
603613

604614
AArch64FunctionInfo() = default;
605615
AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI);
@@ -611,7 +621,8 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
611621
template <> struct MappingTraits<AArch64FunctionInfo> {
612622
static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) {
613623
YamlIO.mapOptional("hasRedZone", MFI.HasRedZone);
614-
YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE);
624+
YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
625+
YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
615626
}
616627
};
617628

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ void AArch64PrologueEmitter::emitPrologue() {
206206

207207
// Now allocate space for the GPR callee saves.
208208
MachineBasicBlock::iterator MBBI = PrologueBeginI;
209-
while (MBBI != EndI && AFL.isSVECalleeSave(MBBI))
209+
while (MBBI != EndI && AFL.isPartOfSVECalleeSaves(MBBI))
210210
++MBBI;
211211
FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
212212
MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
@@ -238,7 +238,7 @@ void AArch64PrologueEmitter::emitPrologue() {
238238
MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
239239
while (AfterGPRSavesI != EndI &&
240240
AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
241-
!AFL.isSVECalleeSave(AfterGPRSavesI)) {
241+
!AFL.isPartOfSVECalleeSaves(AfterGPRSavesI)) {
242242
if (CombineSPBump &&
243243
// Only fix-up frame-setup load/store instructions.
244244
(!AFL.requiresSaveVG(MF) || !AFL.isVGInstruction(AfterGPRSavesI, TLI)))
@@ -269,38 +269,66 @@ void AArch64PrologueEmitter::emitPrologue() {
269269
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
270270
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
271271

272-
StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
273-
StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
274272
MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
275273

274+
StackOffset PPRCalleeSavesSize =
275+
StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
276+
StackOffset ZPRCalleeSavesSize =
277+
StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
278+
StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
279+
StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
280+
StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
281+
276282
StackOffset CFAOffset =
277283
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
278-
279-
// Process the SVE callee-saves to determine what space needs to be
280-
// allocated.
281284
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
282-
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
283-
LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
284-
<< "\n");
285-
SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
286-
SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
287-
// Find callee save instructions in frame.
288-
// Note: With FPAfterSVECalleeSaves the callee saves have already been
285+
286+
if (!FPAfterSVECalleeSaves) {
287+
MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
288+
ZPRCalleeSavesEnd = AfterGPRSavesI;
289+
MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
290+
PPRCalleeSavesEnd = AfterGPRSavesI;
291+
292+
// Process the SVE callee-saves to determine what space needs to be
289293
// allocated.
290-
if (!FPAfterSVECalleeSaves) {
291-
MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
292-
assert(AFL.isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
293-
while (AFL.isSVECalleeSave(AfterSVESavesI) &&
294+
295+
if (PPRCalleeSavesSize) {
296+
LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
297+
<< PPRCalleeSavesSize.getScalable() << "\n");
298+
299+
PPRCalleeSavesBegin = AfterSVESavesI;
300+
assert(AFL.isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) &&
301+
"Unexpected instruction");
302+
while (AFL.isPartOfPPRCalleeSaves(AfterSVESavesI) &&
294303
AfterSVESavesI != MBB.getFirstTerminator())
295304
++AfterSVESavesI;
296-
CalleeSavesEnd = AfterSVESavesI;
305+
PPRCalleeSavesEnd = AfterSVESavesI;
306+
}
297307

298-
StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
299-
// Allocate space for the callee saves (if any).
300-
AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
301-
false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
302-
MFI.hasVarSizedObjects() || LocalsSize);
308+
if (ZPRCalleeSavesSize) {
309+
LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
310+
<< ZPRCalleeSavesSize.getScalable() << "\n");
311+
ZPRCalleeSavesBegin = AfterSVESavesI;
312+
assert(AFL.isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) &&
313+
"Unexpected instruction");
314+
while (AFL.isPartOfZPRCalleeSaves(AfterSVESavesI) &&
315+
AfterSVESavesI != MBB.getFirstTerminator())
316+
++AfterSVESavesI;
317+
ZPRCalleeSavesEnd = AfterSVESavesI;
303318
}
319+
320+
// Allocate space for the callee saves (if any).
321+
StackOffset LocalsSize =
322+
PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
323+
MachineBasicBlock::iterator CalleeSavesBegin =
324+
AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
325+
: ZPRCalleeSavesBegin;
326+
AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
327+
nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
328+
MFI.hasVarSizedObjects() || LocalsSize);
329+
330+
CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
331+
: PPRCalleeSavesEnd;
304332
}
305333
CFAOffset += SVECalleeSavesSize;
306334

@@ -315,6 +343,7 @@ void AArch64PrologueEmitter::emitPrologue() {
315343
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
316344
// the correct value here, as NumBytes also includes padding bytes,
317345
// which shouldn't be counted here.
346+
StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
318347
AFL.allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
319348
SVELocalsSize + StackOffset::getFixed(NumBytes),
320349
NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
@@ -365,7 +394,8 @@ void AArch64PrologueEmitter::emitPrologue() {
365394
emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
366395
} else {
367396
StackOffset TotalSize =
368-
SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
397+
AFL.getSVEStackSize(MF) +
398+
StackOffset::getFixed((int64_t)MFI.getStackSize());
369399
CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
370400
CFIBuilder.insertCFIInst(
371401
createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
644644
if (ST.hasSVE() || ST.isStreaming()) {
645645
// Frames that have variable sized objects and scalable SVE objects,
646646
// should always use a basepointer.
647-
if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
647+
if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize())
648648
return true;
649649
}
650650

@@ -784,7 +784,7 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
784784
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
785785
AFI->hasCalculatedStackSizeSVE()) &&
786786
"Expected SVE area to be calculated by this point");
787-
return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
787+
return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() &&
788788
!AFI->hasStackHazardSlotIndex();
789789
}
790790

llvm/test/DebugInfo/AArch64/asan-stack-vars.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ frameInfo:
366366
maxCallFrameSize: 0
367367
localFrameSize: 144
368368
machineFunctionInfo:
369-
stackSizeSVE: 0
369+
stackSizeZPR: 0
370+
stackSizePPR: 0
370371
stack:
371372
- { id: 0, name: StackGuardSlot, offset: -40, size: 8, alignment: 8,
372373
stack-id: default, local-offset: -8 }

llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ frameInfo:
6969
hasCalls: true
7070
maxCallFrameSize: 0
7171
machineFunctionInfo:
72-
stackSizeSVE: 0
72+
stackSizeZPR: 0
73+
stackSizePPR: 0
7374
stack:
7475
- { id: 0, type: spill-slot, offset: -20, size: 4, alignment: 4, stack-id: default }
7576
- { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,

0 commit comments

Comments
 (0)