Skip to content

Commit ec07142

Browse files
committed
WIP attempt to avoid MCRegAliasIterator
1 parent b72549b commit ec07142

14 files changed

+4456
-3222
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2245,25 +2245,43 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
22452245
return true;
22462246
}
22472247

2248-
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
2249-
const TargetRegisterInfo *TRI) {
2250-
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
2251-
if (MBB.isLiveIn(*R)) {
2252-
return true;
2253-
}
2254-
}
2255-
return false;
2256-
}
2257-
22582248
bool SIFrameLowering::spillCalleeSavedRegisters(
22592249
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
22602250
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
22612251
MachineFunction *MF = MBB.getParent();
22622252
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
22632253
const SIInstrInfo *TII = ST.getInstrInfo();
22642254
const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2255+
const MachineRegisterInfo &MRI = MF->getRegInfo();
22652256

22662257
if (!ST.useVGPRBlockOpsForCSR()) {
2258+
SparseBitVector<> LiveInRoots;
2259+
if (MRI.tracksLiveness()) {
2260+
for (const auto &LI : MBB.liveins()) {
2261+
for (MCRegUnitMaskIterator MI(LI.PhysReg, TRI); MI.isValid(); ++MI) {
2262+
auto [Unit, UnitLaneMask] = *MI;
2263+
if ((LI.LaneMask & UnitLaneMask).none())
2264+
continue;
2265+
for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI)
2266+
LiveInRoots.set(*RI);
2267+
}
2268+
}
2269+
}
2270+
2271+
auto UpdateLiveInCheckCanKill = [&](MCRegister Reg) {
2272+
if (!MRI.tracksLiveness())
2273+
return false;
2274+
for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
2275+
for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
2276+
if (LiveInRoots.test(*RI))
2277+
return false;
2278+
}
2279+
}
2280+
// Reg is live in to the spill
2281+
MBB.addLiveIn(Reg);
2282+
return true;
2283+
};
2284+
22672285
for (const CalleeSavedInfo &CS : CSI) {
22682286
// Insert the spill to the stack frame.
22692287
unsigned Reg = CS.getReg();
@@ -2279,9 +2297,8 @@ bool SIFrameLowering::spillCalleeSavedRegisters(
22792297
// the incoming register value, so don't kill at the spill point. This
22802298
// happens since we pass some special inputs (workgroup IDs) in the
22812299
// callee saved range.
2282-
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
2283-
TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(),
2284-
RC);
2300+
TII->storeRegToStackSlotCFI(MBB, MI, Reg, UpdateLiveInCheckCanKill(Reg),
2301+
CS.getFrameIdx(), RC);
22852302
}
22862303
}
22872304
return true;

llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir

Lines changed: 12 additions & 12 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 2607 additions & 1453 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll

Lines changed: 273 additions & 269 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll

Lines changed: 141 additions & 137 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll

Lines changed: 233 additions & 226 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll

Lines changed: 368 additions & 368 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll

Lines changed: 497 additions & 497 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
1818
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1919
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
2020
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
21-
; GFX9-NEXT: s_addk_i32 s32, 0x400
2221
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
22+
; GFX9-NEXT: s_addk_i32 s32, 0x400
23+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2324
; GFX9-NEXT: s_getpc_b64 s[18:19]
2425
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
2526
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
2627
; GFX9-NEXT: s_mov_b32 s0, s16
27-
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2828
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
2929
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3030
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
@@ -48,12 +48,12 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
4848
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
4949
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5050
; GFX11-NEXT: s_add_i32 s32, s32, 16
51+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5152
; GFX11-NEXT: s_getpc_b64 s[2:3]
5253
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
5354
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
54-
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
55+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5556
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
56-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5757
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5858
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5959
; GFX11-NEXT: s_mov_b32 s32, s33
@@ -78,13 +78,13 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
7878
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7979
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
8080
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
81-
; GFX9-NEXT: s_addk_i32 s32, 0x400
8281
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
82+
; GFX9-NEXT: s_addk_i32 s32, 0x400
83+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8384
; GFX9-NEXT: s_getpc_b64 s[18:19]
8485
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
8586
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
8687
; GFX9-NEXT: s_mov_b32 s0, s16
87-
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8888
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
8989
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
9090
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
@@ -108,12 +108,12 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
108108
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
109109
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
110110
; GFX11-NEXT: s_add_i32 s32, s32, 16
111+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
111112
; GFX11-NEXT: s_getpc_b64 s[2:3]
112113
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
113114
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
114-
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
115+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
115116
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
116-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
117117
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
118118
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
119119
; GFX11-NEXT: s_mov_b32 s32, s33

0 commit comments

Comments
 (0)