Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/MachineFrameInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class CalleeSavedInfo {
MCRegister getReg() const { return Reg; }
int getFrameIdx() const { return FrameIdx; }
MCRegister getDstReg() const { return DstReg; }
void setReg(MCRegister R) { Reg = R; }
void setFrameIdx(int FI) {
FrameIdx = FI;
SpilledToReg = false;
Expand Down
17 changes: 17 additions & 0 deletions llvm/include/llvm/CodeGen/TargetFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,14 @@ class TargetFrameLowering {
return false;
}

/// spillCalleeSavedRegister - Default implementation for spilling a single
/// callee saved register.
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock,
MachineBasicBlock::iterator MI,
const CalleeSavedInfo &CS,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const;

/// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
/// saved registers and returns true if it isn't possible / profitable to do
/// so by issuing a series of load instructions via loadRegToStackSlot().
Expand All @@ -284,6 +292,15 @@ class TargetFrameLowering {
return false;
}

// restoreCalleeSavedRegister - Default implementation for restoring a single
// callee saved register. Should be called in reverse order. Can insert
// multiple instructions.
void restoreCalleeSavedRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const CalleeSavedInfo &CS,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const;

/// hasFP - Return true if the specified function should have a dedicated
/// frame pointer register. For most targets this is true only if the function
/// has variable sized allocas or if frame pointer elimination is disabled.
Expand Down
35 changes: 6 additions & 29 deletions llvm/lib/CodeGen/PrologEpilogInserter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,8 +476,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
// Now that we know which registers need to be saved and restored, allocate
// stack slots for them.
for (auto &CS : CSI) {
// If the target has spilled this register to another register, we don't
// need to allocate a stack slot.
// If the target has spilled this register to another register or already
// handled it , we don't need to allocate a stack slot.
if (CS.isSpilledToReg())
continue;

Expand Down Expand Up @@ -597,25 +597,14 @@ static void updateLiveness(MachineFunction &MF) {
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
ArrayRef<CalleeSavedInfo> CSI) {
MachineFunction &MF = *SaveBlock.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

MachineBasicBlock::iterator I = SaveBlock.begin();
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CS : CSI) {
// Insert the spill to the stack frame.
MCRegister Reg = CS.getReg();

if (CS.isSpilledToReg()) {
BuildMI(SaveBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY),
CS.getDstReg())
.addReg(Reg, getKillRegState(true));
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
TRI, Register());
}
TFI->spillCalleeSavedRegister(SaveBlock, I, CS, TII, TRI);
}
}
}
Expand All @@ -624,7 +613,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
std::vector<CalleeSavedInfo> &CSI) {
MachineFunction &MF = *RestoreBlock.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

Expand All @@ -634,19 +623,7 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,

if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
MCRegister Reg = CI.getReg();
if (CI.isSpilledToReg()) {
BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
.addReg(CI.getDstReg(), getKillRegState(true));
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC,
TRI, Register());
assert(I != RestoreBlock.begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
// multiple instructions.
}
TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, TII, TRI);
}
}
}
Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
Expand Down Expand Up @@ -182,3 +183,37 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF).id()}};
}

void TargetFrameLowering::spillCalleeSavedRegister(
MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI,
const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const {
// Insert the spill to the stack frame.
MCRegister Reg = CS.getReg();

if (CS.isSpilledToReg()) {
BuildMI(SaveBlock, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
CS.getDstReg())
.addReg(Reg, getKillRegState(true));
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC,
TRI, Register());
}
}

void TargetFrameLowering::restoreCalleeSavedRegister(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const {
MCRegister Reg = CS.getReg();
if (CS.isSpilledToReg()) {
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
.addReg(CS.getDstReg(), getKillRegState(true));
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
Register());
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
}
}
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,14 @@ def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
>;

// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
// restoring the callee-saved registers.
def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",
"UseBlockVGPROpsForCSR",
"true",
"Use block load/store for VGPR callee saved registers"
>;

def FeatureLshlAddU64Inst
: SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true",
"Has v_lshl_add_u64 instruction">;
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
Expand Down Expand Up @@ -243,6 +244,36 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV,
return AsmPrinter::lowerConstant(CV, BaseCV, Offset);
}

static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
const TargetRegisterInfo *TRI,
const SIMachineFunctionInfo *MFI,
MCStreamer &OS) {
// The instruction will only transfer a subset of the registers in the block,
// based on the mask that is stored in m0. We could search for the instruction
// that sets m0, but most of the time we'll already have the mask stored in
// the machine function info. Try to use that. This assumes that we only use
// block loads/stores for CSR spills.
Register RegBlock =
TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst
: AMDGPU::OpName::vdata)
->getReg();
Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0);
uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock);

if (!Mask)
return; // Nothing to report

SmallString<512> TransferredRegs;
for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) {
if (Mask & (1 << I)) {
(llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I))
.toVector(TransferredRegs);
}
}

OS.emitRawComment(" transferring at most " + TransferredRegs);
}

void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
// FIXME: Enable feature predicate checks once all the test pass.
// AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
Expand Down Expand Up @@ -331,6 +362,12 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}

if (isVerbose())
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
MF->getInfo<SIMachineFunctionInfo>(),
*OutStreamer);

MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasPointSampleAccel = false;

bool RequiresCOV6 = false;
bool UseBlockVGPROpsForCSR = false;

// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
Expand Down Expand Up @@ -1277,6 +1278,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool requiresCodeObjectV6() const { return RequiresCOV6; }

bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }

bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }

bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; }
Expand Down
Loading
Loading