Skip to content

Commit 738c73a

Browse files
committed
RegAllocFast: Make self loop live-out heuristic more aggressive
This currently has no impact on code, but prevents sizeable code size regressions after D52010. This prevents spilling and reloading all values inside blocks that loop back. Add a baseline test which would regress without this patch.
1 parent e47d292 commit 738c73a

File tree

2 files changed

+218
-4
lines changed

2 files changed

+218
-4
lines changed

llvm/lib/CodeGen/RegAllocFast.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,18 +263,37 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
263263
return FrameIdx;
264264
}
265265

266+
static bool dominates(MachineBasicBlock &MBB,
267+
MachineBasicBlock::const_iterator A,
268+
MachineBasicBlock::const_iterator B) {
269+
auto MBBEnd = MBB.end();
270+
if (B == MBBEnd)
271+
return true;
272+
273+
MachineBasicBlock::const_iterator I = MBB.begin();
274+
for (; &*I != A && &*I != B; ++I)
275+
;
276+
277+
return &*I == A;
278+
}
279+
266280
/// Returns false if \p VirtReg is known to not live out of the current block.
267281
bool RegAllocFast::mayLiveOut(Register VirtReg) {
268282
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
269283
// Cannot be live-out if there are no successors.
270284
return !MBB->succ_empty();
271285
}
272286

273-
// If this block loops back to itself, it would be necessary to check whether
274-
// the use comes after the def.
287+
const MachineInstr *SelfLoopDef = nullptr;
288+
289+
// If this block loops back to itself, it is necessary to check whether the
290+
// use comes after the def.
275291
if (MBB->isSuccessor(MBB)) {
276-
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
277-
return true;
292+
SelfLoopDef = MRI->getUniqueVRegDef(VirtReg);
293+
if (!SelfLoopDef) {
294+
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
295+
return true;
296+
}
278297
}
279298

280299
// See if the first \p Limit uses of the register are all in the current
@@ -287,6 +306,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
287306
// Cannot be live-out if there are no successors.
288307
return !MBB->succ_empty();
289308
}
309+
310+
if (SelfLoopDef) {
311+
// Try to handle some simple cases to avoid spilling and reloading every
312+
// value inside a self looping block.
313+
if (SelfLoopDef == &UseInst ||
314+
!dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
315+
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
316+
return true;
317+
}
318+
}
290319
}
291320

292321
return false;
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
name: self_loop_single_def_use
6+
tracksRegLiveness: true
7+
machineFunctionInfo:
8+
isEntryFunction: true
9+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
10+
stackPtrOffsetReg: '$sgpr32'
11+
body: |
12+
; GCN-LABEL: name: self_loop_single_def_use
13+
; GCN: bb.0:
14+
; GCN: successors: %bb.1(0x80000000)
15+
; GCN: liveins: $vgpr0_vgpr1
16+
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
17+
; GCN: bb.1:
18+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
19+
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
20+
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
21+
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
22+
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
23+
; GCN: bb.2:
24+
; GCN: S_ENDPGM 0
25+
bb.0:
26+
liveins: $vgpr0_vgpr1
27+
%0:vreg_64 = COPY $vgpr0_vgpr1
28+
29+
bb.1:
30+
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
31+
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
32+
S_CBRANCH_EXECZ %bb.1, implicit $exec
33+
34+
bb.2:
35+
S_ENDPGM 0
36+
37+
...
38+
39+
---
40+
name: self_loop_multi_def
41+
tracksRegLiveness: true
42+
machineFunctionInfo:
43+
isEntryFunction: true
44+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
45+
stackPtrOffsetReg: '$sgpr32'
46+
body: |
47+
; GCN-LABEL: name: self_loop_multi_def
48+
; GCN: bb.0:
49+
; GCN: successors: %bb.1(0x80000000)
50+
; GCN: liveins: $vgpr0_vgpr1
51+
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
52+
; GCN: bb.1:
53+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
54+
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
55+
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
56+
; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
57+
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
58+
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
59+
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
60+
; GCN: bb.2:
61+
; GCN: S_ENDPGM 0
62+
bb.0:
63+
liveins: $vgpr0_vgpr1
64+
%0:vreg_64 = COPY $vgpr0_vgpr1
65+
66+
bb.1:
67+
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
68+
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
69+
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
70+
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
71+
S_CBRANCH_EXECZ %bb.1, implicit $exec
72+
73+
bb.2:
74+
S_ENDPGM 0
75+
76+
...
77+
78+
# There's a single def inside the self loop, but it's also a use.
79+
80+
---
81+
name: self_loop_def_use_same_inst
82+
tracksRegLiveness: true
83+
machineFunctionInfo:
84+
isEntryFunction: true
85+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
86+
stackPtrOffsetReg: '$sgpr32'
87+
body: |
88+
; GCN-LABEL: name: self_loop_def_use_same_inst
89+
; GCN: bb.0:
90+
; GCN: successors: %bb.1(0x80000000)
91+
; GCN: liveins: $vgpr0_vgpr1
92+
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
93+
; GCN: bb.1:
94+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
95+
; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
96+
; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
97+
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec
98+
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
99+
; GCN: bb.2:
100+
; GCN: S_ENDPGM 0
101+
bb.0:
102+
liveins: $vgpr0_vgpr1
103+
%0:vreg_64 = COPY $vgpr0_vgpr1
104+
105+
bb.1:
106+
%1:vgpr_32 = V_ADD_U32_e32 1, undef %1, implicit $exec
107+
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
108+
S_CBRANCH_EXECZ %bb.1, implicit $exec
109+
110+
bb.2:
111+
S_ENDPGM 0
112+
113+
...
114+
115+
---
116+
name: self_loop_def_after_use
117+
tracksRegLiveness: true
118+
machineFunctionInfo:
119+
isEntryFunction: true
120+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
121+
stackPtrOffsetReg: '$sgpr32'
122+
body: |
123+
; GCN-LABEL: name: self_loop_def_after_use
124+
; GCN: bb.0:
125+
; GCN: successors: %bb.1(0x80000000)
126+
; GCN: liveins: $vgpr0_vgpr1
127+
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
128+
; GCN: bb.1:
129+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
130+
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
131+
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec
132+
; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec
133+
; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
134+
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
135+
; GCN: bb.2:
136+
; GCN: S_ENDPGM 0
137+
bb.0:
138+
liveins: $vgpr0_vgpr1
139+
%0:vreg_64 = COPY $vgpr0_vgpr1
140+
141+
bb.1:
142+
GLOBAL_STORE_DWORD %0, undef %1, 0, 0, 0, 0, implicit $exec
143+
%1:vgpr_32 = V_ADD_U32_e64 1, 1, 0, implicit $exec
144+
S_CBRANCH_EXECZ %bb.1, implicit $exec
145+
146+
bb.2:
147+
S_ENDPGM 0
148+
149+
...
150+
151+
---
152+
name: self_loop_single_subreg_def_use
153+
tracksRegLiveness: true
154+
machineFunctionInfo:
155+
isEntryFunction: true
156+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
157+
stackPtrOffsetReg: '$sgpr32'
158+
body: |
159+
; GCN-LABEL: name: self_loop_single_subreg_def_use
160+
; GCN: bb.0:
161+
; GCN: successors: %bb.1(0x80000000)
162+
; GCN: liveins: $vgpr0_vgpr1
163+
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
164+
; GCN: bb.1:
165+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
166+
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
167+
; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3
168+
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec
169+
; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5)
170+
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
171+
; GCN: bb.2:
172+
; GCN: S_ENDPGM 0
173+
bb.0:
174+
liveins: $vgpr0_vgpr1
175+
%0:vreg_64 = COPY $vgpr0_vgpr1
176+
177+
bb.1:
178+
undef %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
179+
GLOBAL_STORE_DWORD %0, undef %1.sub1, 0, 0, 0, 0, implicit $exec
180+
S_CBRANCH_EXECZ %bb.1, implicit $exec
181+
182+
bb.2:
183+
S_ENDPGM 0
184+
185+
...

0 commit comments

Comments
 (0)