Skip to content

Commit 2b07f34

Browse files
authored
[AMDGPU] Ensure non-reserved CSR spilled regs are live-in (llvm#146427) (llvm#3432)
2 parents 94b2eae + 7efab10 commit 2b07f34

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,7 @@ void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF,
11601160
const SIInstrInfo *TII = ST.getInstrInfo();
11611161
const SIRegisterInfo &TRI = TII->getRegisterInfo();
11621162
const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo();
1163+
MachineRegisterInfo &MRI = MF.getRegInfo();
11631164

11641165
// Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
11651166
// registers. However, save all lanes of callee-saved VGPRs. Due to this, we
@@ -1188,6 +1189,12 @@ void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF,
11881189
}
11891190
};
11901191

1192+
for (const Register Reg : make_first_range(WWMScratchRegs)) {
1193+
if (!MRI.isReserved(Reg)) {
1194+
MRI.addLiveIn(Reg);
1195+
MBB.addLiveIn(Reg);
1196+
}
1197+
}
11911198
StoreWWMRegisters(WWMScratchRegs);
11921199
if (!WWMCalleeSavedRegs.empty()) {
11931200
if (ScratchExecCopy) {
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-lower-sgpr-spills,greedy,si-lower-wwm-copies,virtregrewriter,prologepilog -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
name: widget
6+
tracksRegLiveness: true
7+
frameInfo:
8+
adjustsStack: true
9+
stack:
10+
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
11+
- { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
12+
machineFunctionInfo:
13+
hasSpilledSGPRs: true
14+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
15+
stackPtrOffsetReg: '$sgpr32'
16+
body: |
17+
; GCN-LABEL: name: widget
18+
; GCN: bb.0:
19+
; GCN-NEXT: successors: %bb.1(0x80000000)
20+
; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0
21+
; GCN-NEXT: {{ $}}
22+
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6
23+
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32
24+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0
25+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14
26+
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
27+
; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
28+
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
29+
; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $agpr0, 0
30+
; GCN-NEXT: $exec = S_MOV_B64 -1
31+
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
32+
; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr62, 256
33+
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
34+
; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF
35+
; GCN-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62
36+
; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
37+
; GCN-NEXT: renamable $agpr0 = COPY killed renamable $vgpr62
38+
; GCN-NEXT: $exec = S_MOV_B64 killed $noreg
39+
; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF
40+
; GCN-NEXT: dead renamable $vgpr62 = V_AND_B32_e32 1, killed $vgpr62, implicit $exec
41+
; GCN-NEXT: {{ $}}
42+
; GCN-NEXT: bb.1:
43+
; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
44+
; GCN-NEXT: liveins: $agpr0
45+
; GCN-NEXT: {{ $}}
46+
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
47+
; GCN-NEXT: S_BRANCH %bb.3
48+
; GCN-NEXT: {{ $}}
49+
; GCN-NEXT: bb.2:
50+
; GCN-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
51+
; GCN-NEXT: liveins: $agpr0, $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99
52+
; GCN-NEXT: {{ $}}
53+
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
54+
; GCN-NEXT: S_BRANCH %bb.4
55+
; GCN-NEXT: {{ $}}
56+
; GCN-NEXT: bb.3:
57+
; GCN-NEXT: successors: %bb.2(0x80000000)
58+
; GCN-NEXT: liveins: $agpr0
59+
; GCN-NEXT: {{ $}}
60+
; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
61+
; GCN-NEXT: renamable $vgpr62 = COPY renamable $agpr0
62+
; GCN-NEXT: $exec = S_MOV_B64 killed $noreg
63+
; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr62, 1
64+
; GCN-NEXT: S_BRANCH %bb.2
65+
; GCN-NEXT: {{ $}}
66+
; GCN-NEXT: bb.4:
67+
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
68+
; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
69+
; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
70+
; GCN-NEXT: $exec = S_MOV_B64 -1
71+
; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
72+
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
73+
; GCN-NEXT: SI_RETURN
74+
bb.0:
75+
liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15
76+
77+
%45:vgpr_32 = IMPLICIT_DEF
78+
SI_SPILL_S32_SAVE $sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
79+
%16:vgpr_32 = V_AND_B32_e32 1, %45, implicit $exec
80+
81+
bb.1:
82+
successors: %bb.3, %bb.2
83+
84+
S_CBRANCH_EXECZ %bb.2, implicit $exec
85+
S_BRANCH %bb.3
86+
87+
bb.2:
88+
successors: %bb.4(0x04000000), %bb.1(0x7c000000)
89+
liveins: $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99
90+
91+
S_CBRANCH_EXECNZ %bb.1, implicit $exec
92+
S_BRANCH %bb.4
93+
94+
bb.3:
95+
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
96+
$sgpr14 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
97+
ADJCALLSTACKDOWN 0, 28, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
98+
S_BRANCH %bb.2
99+
100+
bb.4:
101+
SI_RETURN
102+
103+
...

0 commit comments

Comments
 (0)