Skip to content

Commit b0e31c3

Browse files
committed
Merge branch 'amd-debug' into amd-staging
Also restores "Support block load/store for CSR"
2 parents b21e586 + fb604a3 commit b0e31c3

File tree

10 files changed

+997
-208
lines changed

10 files changed

+997
-208
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 48 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2181,17 +2181,50 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
21812181
return true;
21822182
}
21832183

2184+
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
2185+
const TargetRegisterInfo *TRI) {
2186+
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
2187+
if (MBB.isLiveIn(*R)) {
2188+
return true;
2189+
}
2190+
}
2191+
return false;
2192+
}
2193+
21842194
bool SIFrameLowering::spillCalleeSavedRegisters(
21852195
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
21862196
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
21872197
MachineFunction *MF = MBB.getParent();
21882198
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2189-
if (!ST.useVGPRBlockOpsForCSR())
2190-
return false;
2199+
const SIInstrInfo *TII = ST.getInstrInfo();
2200+
const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2201+
2202+
if (!ST.useVGPRBlockOpsForCSR()) {
2203+
for (const CalleeSavedInfo &CS : CSI) {
2204+
// Insert the spill to the stack frame.
2205+
unsigned Reg = CS.getReg();
2206+
2207+
if (CS.isSpilledToReg()) {
2208+
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
2209+
CS.getDstReg())
2210+
.addReg(Reg, getKillRegState(true));
2211+
} else {
2212+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
2213+
Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32);
2214+
// If this value was already livein, we probably have a direct use of
2215+
// the incoming register value, so don't kill at the spill point. This
2216+
// happens since we pass some special inputs (workgroup IDs) in the
2217+
// callee saved range.
2218+
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
2219+
TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(),
2220+
RC, TRI);
2221+
}
2222+
}
2223+
return true;
2224+
}
21912225

21922226
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
21932227
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2194-
const SIInstrInfo *TII = ST.getInstrInfo();
21952228
SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
21962229

21972230
const TargetRegisterClass *BlockRegClass =
@@ -2215,7 +2248,7 @@ bool SIFrameLowering::spillCalleeSavedRegisters(
22152248
FrameInfo.getObjectAlign(FrameIndex));
22162249

22172250
BuildMI(MBB, MI, MI->getDebugLoc(),
2218-
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2251+
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))
22192252
.addReg(Reg, getKillRegState(false))
22202253
.addFrameIndex(FrameIndex)
22212254
.addReg(MFI->getStackPtrOffsetReg())
@@ -2392,48 +2425,6 @@ bool SIFrameLowering::requiresStackPointerReference(
23922425
return frameTriviallyRequiresSP(MFI);
23932426
}
23942427

2395-
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
2396-
const TargetRegisterInfo *TRI) {
2397-
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
2398-
if (MBB.isLiveIn(*R)) {
2399-
return true;
2400-
}
2401-
}
2402-
return false;
2403-
}
2404-
2405-
bool SIFrameLowering::spillCalleeSavedRegisters(
2406-
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2407-
const ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2408-
MachineFunction &MF = *MBB.getParent();
2409-
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2410-
const SIRegisterInfo *RI = ST.getRegisterInfo();
2411-
const SIInstrInfo *TII = ST.getInstrInfo();
2412-
2413-
for (const CalleeSavedInfo &CS : CSI) {
2414-
// Insert the spill to the stack frame.
2415-
unsigned Reg = CS.getReg();
2416-
2417-
if (CS.isSpilledToReg()) {
2418-
BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
2419-
CS.getDstReg())
2420-
.addReg(Reg, getKillRegState(true));
2421-
} else {
2422-
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
2423-
Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
2424-
// If this value was already livein, we probably have a direct use of the
2425-
// incoming register value, so don't kill at the spill point. This happens
2426-
// since we pass some special inputs (workgroup IDs) in the callee saved
2427-
// range.
2428-
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
2429-
TII->storeRegToStackSlotCFI(MBB, MBBI, Reg, !IsLiveIn, CS.getFrameIdx(),
2430-
RC, TRI);
2431-
}
2432-
}
2433-
2434-
return true;
2435-
}
2436-
24372428
MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB,
24382429
MachineBasicBlock::iterator MBBI,
24392430
const DebugLoc &DL,
@@ -2558,3 +2549,14 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25582549
nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize);
25592550
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25602551
}
2552+
2553+
MachineInstr *
2554+
SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB,
2555+
MachineBasicBlock::iterator MBBI,
2556+
const DebugLoc &DL, Register Reg) const {
2557+
const MachineFunction &MF = *MBB.getParent();
2558+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2559+
int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false);
2560+
auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg);
2561+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2562+
}

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,6 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
8484
MachineBasicBlock &MBB,
8585
MachineBasicBlock::iterator MI) const override;
8686

87-
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
88-
MachineBasicBlock::iterator MBBI,
89-
const ArrayRef<CalleeSavedInfo> CSI,
90-
const TargetRegisterInfo *TRI) const override;
91-
9287
protected:
9388
bool hasFPImpl(const MachineFunction &MF) const override;
9489

@@ -167,6 +162,9 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
167162
MachineBasicBlock::iterator MBBI,
168163
const DebugLoc &DL, Register Reg,
169164
Register SGPRPair) const;
165+
MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB,
166+
MachineBasicBlock::iterator MBBI,
167+
const DebugLoc &DL, Register Reg) const;
170168
// Returns true if the function may need to reserve space on the stack for the
171169
// CWSR trap handler.
172170
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
683683
static bool isBlockLoadStore(uint16_t Opcode) {
684684
switch (Opcode) {
685685
case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
686+
case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE:
686687
case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
687688
case AMDGPU::SCRATCH_STORE_BLOCK_SADDR:
688689
case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR:

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,8 +1054,10 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class,
10541054

10551055
def _CFI_SAVE : VPseudoInstSI <
10561056
(outs),
1057-
(ins vgpr_class:$vdata, i32imm:$vaddr,
1058-
SReg_32:$soffset, i32imm:$offset)> {
1057+
!con(
1058+
(ins vgpr_class:$vdata, i32imm:$vaddr,
1059+
SReg_32:$soffset, i32imm:$offset),
1060+
!if(HasMask, (ins SReg_32:$mask), (ins)))> {
10591061
let mayStore = 1;
10601062
let mayLoad = 0;
10611063
// (2 * 4) + (8 * num_subregs) bytes maximum

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -114,36 +114,13 @@ static void insertCSRSaves(const GCNSubtarget &ST, MachineBasicBlock &SaveBlock,
114114
assert(Success && "spillCalleeSavedRegisters should always succeed");
115115
(void)Success;
116116

117-
MachineInstrSpan MIS(I, &SaveBlock);
118-
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
119-
Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
120-
121-
// If this value was already livein, we probably have a direct use of the
122-
// incoming register value, so don't kill at the spill point. This happens
123-
// since we pass some special inputs (workgroup IDs) in the callee saved
124-
// range.
125-
const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, TRI);
126-
TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
127-
RC, TRI, Register());
117+
// TFI doesn't update Indexes and LIS, so we have to do it separately.
118+
if (Indexes)
119+
Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I);
128120

129-
if (Indexes) {
130-
assert(std::distance(MIS.begin(), I) == 1);
131-
MachineInstr &Inst = *std::prev(I);
132-
Indexes->insertMachineInstrInMaps(Inst);
133-
}
134-
135-
if (LIS)
136-
LIS->removeAllRegUnitsForPhysReg(Reg);
137-
}
138-
} else {
139-
// TFI doesn't update Indexes and LIS, so we have to do it separately.
140-
if (Indexes)
141-
Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I);
142-
143-
if (LIS)
144-
for (const CalleeSavedInfo &CS : CSI)
145-
LIS->removeAllRegUnitsForPhysReg(CS.getReg());
146-
}
121+
if (LIS)
122+
for (const CalleeSavedInfo &CS : CSI)
123+
LIS->removeAllRegUnitsForPhysReg(CS.getReg());
147124
}
148125

149126
/// Insert restore code for the callee-saved registers used in the function.

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,7 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI,
12021202
unsigned Op = MI.getOpcode();
12031203
switch (Op) {
12041204
case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
1205+
case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE:
12051206
case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
12061207
// FIXME: This assumes the mask is statically known and not computed at
12071208
// runtime. However, some ABIs may want to compute the mask dynamically and
@@ -1986,11 +1987,17 @@ void SIRegisterInfo::buildSpillLoadStore(
19861987
MIB.addImm(0); // swz
19871988
MIB.addMemOperand(NewMMO);
19881989

1989-
if (IsStore && NeedsCFI)
1990-
TFL->buildCFIForVGPRToVMEMSpill(MBB, MI, DebugLoc(), SubReg,
1991-
(Offset + RegOffset) *
1992-
ST.getWavefrontSize() +
1993-
AdditionalCFIOffset);
1990+
if (IsStore && NeedsCFI) {
1991+
if (TII->isBlockLoadStore(LoadStoreOp)) {
1992+
assert(RegOffset == 0 &&
1993+
"expected whole register block to be treated as single element");
1994+
buildCFIForBlockCSRStore(MBB, MI, ValueReg, Offset);
1995+
} else {
1996+
TFL->buildCFIForVGPRToVMEMSpill(
1997+
MBB, MI, DebugLoc(), SubReg,
1998+
(Offset + RegOffset) * ST.getWavefrontSize() + AdditionalCFIOffset);
1999+
}
2000+
}
19942001

19952002
if (!IsAGPR && NeedSuperRegDef)
19962003
MIB.addReg(ValueReg, RegState::ImplicitDefine);
@@ -2061,6 +2068,31 @@ void SIRegisterInfo::addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
20612068
MIB.addUse(BaseVGPR + RegOffset, RegState::Implicit);
20622069
}
20632070

2071+
void SIRegisterInfo::buildCFIForBlockCSRStore(MachineBasicBlock &MBB,
2072+
MachineBasicBlock::iterator MBBI,
2073+
Register BlockReg,
2074+
int64_t Offset) const {
2075+
const MachineFunction *MF = MBB.getParent();
2076+
const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
2077+
uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(BlockReg);
2078+
Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);
2079+
for (unsigned RegOffset = 0; RegOffset < 32; ++RegOffset) {
2080+
Register VGPR = BaseVGPR + RegOffset;
2081+
if (Mask & (1 << RegOffset)) {
2082+
assert(isCalleeSavedPhysReg(VGPR, *MF));
2083+
ST.getFrameLowering()->buildCFIForVGPRToVMEMSpill(
2084+
MBB, MBBI, DebugLoc(), VGPR,
2085+
(Offset + RegOffset) * ST.getWavefrontSize());
2086+
} else if (isCalleeSavedPhysReg(VGPR, *MF)) {
2087+
// FIXME: This is a workaround for the fact that FrameLowering's
2088+
// emitPrologueEntryCFI considers the block load to clobber all registers
2089+
// in the block.
2090+
ST.getFrameLowering()->buildCFIForSameValue(MBB, MBBI, DebugLoc(),
2091+
BaseVGPR + RegOffset);
2092+
}
2093+
}
2094+
}
2095+
20642096
void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
20652097
int Offset, bool IsLoad,
20662098
bool IsKill) const {
@@ -2538,6 +2570,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
25382570
}
25392571

25402572
// VGPR register spill
2573+
case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE:
25412574
case AMDGPU::SI_SPILL_V1024_CFI_SAVE:
25422575
case AMDGPU::SI_SPILL_V512_CFI_SAVE:
25432576
case AMDGPU::SI_SPILL_V256_CFI_SAVE:
@@ -2570,13 +2603,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
25702603
case AMDGPU::SI_SPILL_AV32_CFI_SAVE:
25712604
NeedsCFI = true;
25722605
LLVM_FALLTHROUGH;
2573-
case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: {
2574-
// Put mask into M0.
2575-
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
2576-
AMDGPU::M0)
2577-
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
2578-
LLVM_FALLTHROUGH;
2579-
}
2606+
case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
25802607
case AMDGPU::SI_SPILL_V1024_SAVE:
25812608
case AMDGPU::SI_SPILL_V512_SAVE:
25822609
case AMDGPU::SI_SPILL_V384_SAVE:
@@ -2622,6 +2649,16 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26222649
case AMDGPU::SI_SPILL_AV32_SAVE:
26232650
case AMDGPU::SI_SPILL_WWM_V32_SAVE:
26242651
case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2652+
assert(
2653+
MI->getOpcode() != AMDGPU::SI_BLOCK_SPILL_V1024_SAVE &&
2654+
"block spill does not currenty support spilling non-CSR registers");
2655+
2656+
if (MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE)
2657+
// Put mask into M0.
2658+
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
2659+
AMDGPU::M0)
2660+
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
2661+
26252662
const MachineOperand *VData = TII->getNamedOperand(*MI,
26262663
AMDGPU::OpName::vdata);
26272664
if (VData->isUndef()) {
@@ -2637,7 +2674,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26372674
assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");
26382675
Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
26392676
} else {
2640-
Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE
2677+
Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE
26412678
? AMDGPU::SCRATCH_STORE_BLOCK_SADDR
26422679
: ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
26432680
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
@@ -2652,7 +2689,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26522689
buildSpillLoadStore(
26532690
*MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
26542691
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2655-
*MI->memoperands_begin(), RS);
2692+
*MI->memoperands_begin(), RS, nullptr, NeedsCFI);
26562693
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(*MI, TII));
26572694
if (IsWWMRegSpill)
26582695
TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
128128
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
129129
Register BlockReg) const;
130130

131+
// Iterate over all VGPRs in the given BlockReg and emit CFI for each VGPR
132+
// as-needed depending on the (statically known) mask, relative to the given
133+
// base Offset.
134+
void buildCFIForBlockCSRStore(MachineBasicBlock &MBB,
135+
MachineBasicBlock::iterator MBBI,
136+
Register BlockReg, int64_t Offset) const;
137+
131138
const TargetRegisterClass *
132139
getLargestLegalSuperClass(const TargetRegisterClass *RC,
133140
const MachineFunction &MF) const override;

0 commit comments

Comments
 (0)