Skip to content

Commit b9ed8eb

Browse files
committed
[ARM][RegisterScavenging] Don't consider LR liveout if it is not reloaded
https://bugs.llvm.org/show_bug.cgi?id=48232 When PrologEpilogInserter writes callee-saved registers to the stack, LR is not reloaded but is instead loaded directly into PC. This was not taken into account when determining if each callee-saved register was liveout for the block. When frame elimination inserts virtual registers, and the register scavenger tries to scavenge LR, it considers it liveout and tries to spill again. However there is no emergency spill slot to use, and it fails with an error: fatal error: error in backend: Error while trying to spill LR from class GPR: Cannot scavenge register without an emergency spill slot! This patch pervents any callee-saved registers which are not reloaded (including LR) from being marked liveout. They are therefore available to scavenge without requiring an extra spill.
1 parent 01b9e61 commit b9ed8eb

File tree

3 files changed

+236
-6
lines changed

3 files changed

+236
-6
lines changed

llvm/lib/CodeGen/LiveRegUnits.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,17 @@ static void addBlockLiveIns(LiveRegUnits &LiveUnits,
8181
static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
8282
const MachineFunction &MF) {
8383
const MachineRegisterInfo &MRI = MF.getRegInfo();
84-
for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
85-
LiveUnits.addReg(*CSR);
84+
const MachineFrameInfo &MFI = MF.getFrameInfo();
85+
for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) {
86+
const unsigned N = *CSR;
87+
88+
const auto &CSI = MFI.getCalleeSavedInfo();
89+
auto Info =
90+
llvm::find_if(CSI, [N](auto Info) { return Info.getReg() == N; });
91+
// If we have no info for this callee-saved register, assume it is liveout
92+
if (Info == CSI.end() || Info->isRestored())
93+
LiveUnits.addReg(N);
94+
}
8695
}
8796

8897
void LiveRegUnits::addPristines(const MachineFunction &MF) {
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
# RUN: llc -mtriple=thumbv7-unknown-linux-android30 -run-pass=prologepilog -verify-machineinstrs %s -o - | FileCheck %s
2+
3+
# When saving and restoring callee-saved registers, LR is saved but not restored,
4+
# because it is reloaded directly into PC. Therefore it should be available to scavenge
5+
# without requiring an emergency spill slot.
6+
7+
# Used to result in
8+
# LLVM ERROR: Error while trying to spill LR from class GPR: Cannot scavenge register without an emergency spill slot!
9+
10+
# Check that LR is considered live in
11+
# CHECK: liveins: {{.*}}$lr
12+
13+
# Check that LR is saved to the stack
14+
# CHECK: frame-setup t2STMDB_UPD {{.*}} killed $lr
15+
# CHECK: frame-setup CFI_INSTRUCTION offset $lr,
16+
17+
# Check that LR was successfully scavenged somewhere in the function
18+
# CHECK: $lr = t2ADDri
19+
# CHECK: VSTMQIA $q11, killed $lr
20+
21+
# Check that LR is not restored at the end of the function
22+
# CHECK-NOT: $lr = frame-destroy
23+
# CHECK-NOT: frame-destroy VLDMDIA_UPD {{.*}} def $lr
24+
# CHECK-NOT: frame-destroy t2LDMIA_RET {{.*}} def $lr
25+
# CHECK: frame-destroy t2LDMIA_RET {{.*}} def $pc
26+
27+
--- |
28+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
29+
30+
%S = type { [32 x i8] }
31+
32+
define void @f(%S* %arg) {
33+
entry:
34+
%ppp..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -8
35+
%ppp..sroa_cast248 = bitcast %S* %ppp..sroa_idx to <8 x float>*
36+
%ppp.copyload = load <8 x float>, <8 x float>* %ppp..sroa_cast248, align 32
37+
38+
%xxx..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -5
39+
%xxx..sroa_cast248 = bitcast %S* %xxx..sroa_idx to <8 x float>*
40+
%xxx.copyload = load <8 x float>, <8 x float>* %xxx..sroa_cast248, align 32
41+
42+
%yyy..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -2
43+
%yyy..sroa_cast244 = bitcast %S* %yyy..sroa_idx to <8 x float>*
44+
%yyy.copyload = load <8 x float>, <8 x float>* %yyy..sroa_cast244, align 32
45+
46+
%zzz..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -7
47+
%zzz..sroa_cast241 = bitcast %S* %zzz..sroa_idx to <8 x float>*
48+
%zzz.copyload = load <8 x float>, <8 x float>* %zzz..sroa_cast241, align 32
49+
50+
%www..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -4
51+
%www..sroa_cast238 = bitcast %S* %www..sroa_idx to <8 x float>*
52+
%www.copyload = load <8 x float>, <8 x float>* %www..sroa_cast238, align 32
53+
54+
%uuu..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 1
55+
%uuu..sroa_cast235 = bitcast %S* %uuu..sroa_idx to <8 x float>*
56+
%uuu.copyload = load <8 x float>, <8 x float>* %uuu..sroa_cast235, align 32
57+
58+
%vvv..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -6
59+
%vvv..sroa_cast230 = bitcast %S* %vvv..sroa_idx to <8 x float>*
60+
%vvv.copyload = load <8 x float>, <8 x float>* %vvv..sroa_cast230, align 32
61+
62+
%ttt..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -3
63+
%ttt..sroa_cast226 = bitcast %S* %ttt..sroa_idx to <8 x float>*
64+
%ttt.copyload = load <8 x float>, <8 x float>* %ttt..sroa_cast226, align 32
65+
66+
%sss..sroa_cast223 = bitcast %S* %arg to <8 x float>*
67+
%sss.copyload = load <8 x float>, <8 x float>* %sss..sroa_cast223, align 32
68+
69+
%mul.i = fmul <8 x float> %ppp.copyload, %www.copyload
70+
%mul.i185 = fmul <8 x float> %xxx.copyload, %uuu.copyload
71+
%mul.i179 = fmul <8 x float> %mul.i185, %vvv.copyload
72+
%mul.i173 = fmul <8 x float> %mul.i179, %ttt.copyload
73+
%mul.i167 = fmul <8 x float> %zzz.copyload, %mul.i173
74+
%add.i = fadd <8 x float> %mul.i, %mul.i167
75+
%div.i = fdiv <8 x float> zeroinitializer, %add.i
76+
%mul.i153 = fmul <8 x float> %uuu.copyload, %div.i
77+
78+
store <8 x float> %mul.i153, <8 x float>* %ppp..sroa_cast248, align 32
79+
80+
%mul.i147 = fmul <8 x float> %uuu.copyload, %vvv.copyload
81+
%mul.i141 = fmul <8 x float> %zzz.copyload, %sss.copyload
82+
%mul.i135 = fmul <8 x float> %mul.i141, %div.i
83+
%sub.i129 = fsub <8 x float> %mul.i147, %mul.i135
84+
85+
store <8 x float> %sub.i129, <8 x float>* %zzz..sroa_cast241, align 32
86+
store <8 x float> %div.i, <8 x float>* %vvv..sroa_cast230, align 32
87+
store <8 x float> %div.i, <8 x float>* %xxx..sroa_cast248, align 32
88+
89+
%mul.i123 = fmul <8 x float> %yyy.copyload, %vvv.copyload
90+
%mul.i117 = fmul <8 x float> %mul.i123, %div.i
91+
%sub.i111 = fsub <8 x float> %sss.copyload, %mul.i117
92+
store <8 x float> %sub.i111, <8 x float>* %www..sroa_cast238, align 32
93+
94+
%mul.i105 = fmul <8 x float> %ppp.copyload, %ttt.copyload
95+
%mul.i99 = fmul <8 x float> %mul.i105, %div.i
96+
%sub.i93 = fsub <8 x float> %xxx.copyload, %mul.i99
97+
store <8 x float> %sub.i93, <8 x float>* %ttt..sroa_cast226, align 32
98+
99+
%mul.i81 = fmul <8 x float> %yyy.copyload, %www.copyload
100+
%mul.i75 = fmul <8 x float> %mul.i81, %div.i
101+
%sub.i = fsub <8 x float> %mul.i185, %mul.i75
102+
store <8 x float> %sub.i, <8 x float>* %yyy..sroa_cast244, align 32
103+
104+
ret void
105+
}
106+
...
107+
---
108+
name: f
109+
alignment: 2
110+
tracksRegLiveness: true
111+
liveins:
112+
- { reg: '$r0' }
113+
frameInfo:
114+
maxAlignment: 16
115+
maxCallFrameSize: 0
116+
stack:
117+
- { id: 0, type: spill-slot, size: 16, alignment: 16 }
118+
- { id: 1, type: spill-slot, size: 16, alignment: 16 }
119+
- { id: 2, type: spill-slot, size: 16, alignment: 16 }
120+
- { id: 3, type: spill-slot, size: 16, alignment: 16 }
121+
constants:
122+
- id: 0
123+
value: 'float 0.000000e+00'
124+
alignment: 4
125+
machineFunctionInfo: {}
126+
body: |
127+
bb.0.entry:
128+
liveins: $r0
129+
$r2 = t2SUBri $r0, 128, 14 /* CC::al */, $noreg, $noreg
130+
$q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238, align 32)
131+
VSTMQIA $q8, %stack.0, 14 /* CC::al */, $noreg :: (store 16 into %stack.0)
132+
$r12 = t2SUBri $r0, 256, 14 /* CC::al */, $noreg, $noreg
133+
$q12 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248, align 32)
134+
$q1 = VMULfq $q12, killed $q8, 14 /* CC::al */, $noreg
135+
$r3 = nuw t2ADDri $r0, 32, 14 /* CC::al */, $noreg, $noreg
136+
$q10 = VLD1q64 killed $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235, align 32)
137+
$r5 = t2SUBri $r0, 160, 14 /* CC::al */, $noreg, $noreg
138+
$q15 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248, align 32)
139+
$q14 = VMULfq $q15, $q10, 14 /* CC::al */, $noreg
140+
$r6 = t2SUBri $r0, 192, 14 /* CC::al */, $noreg, $noreg
141+
$q13 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230, align 32)
142+
$q8 = VMULfq $q14, $q13, 14 /* CC::al */, $noreg
143+
$r4 = t2SUBri $r0, 96, 14 /* CC::al */, $noreg, $noreg
144+
$q6 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226, align 32)
145+
$q8 = VMULfq killed $q8, $q6, 14 /* CC::al */, $noreg
146+
$r3 = t2SUBri $r0, 224, 14 /* CC::al */, $noreg, $noreg
147+
$q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241, align 32)
148+
$q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg
149+
$s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
150+
$s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0
151+
$s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
152+
$s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
153+
$s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0
154+
$r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg
155+
$q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244, align 32)
156+
VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store 16 into %stack.1)
157+
$q8 = VMULfq killed $q8, $q13, 14 /* CC::al */, $noreg
158+
$r1 = t2ADDri $r0, 48, 14 /* CC::al */, $noreg, $noreg
159+
$q9, $r0 = VLD1q32wb_fixed killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223, align 32)
160+
$q11 = COPY $q9
161+
$q11 = VMLSfq killed $q11, killed $q8, $q0, 14 /* CC::al */, $noreg
162+
$r2 = VST1q32wb_fixed killed $r2, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238, align 32)
163+
$q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238 + 16, basealign 32)
164+
VSTMQIA $q8, %stack.3, 14 /* CC::al */, $noreg :: (store 16 into %stack.3)
165+
$q11 = VMULfq $q10, $q0, 14 /* CC::al */, $noreg
166+
$r12 = VST1q32wb_fixed killed $r12, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248, align 32)
167+
$q11 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248 + 16, basealign 32)
168+
VSTMQIA $q11, %stack.2, 14 /* CC::al */, $noreg :: (store 16 into %stack.2)
169+
$q1 = VMULfq killed $q11, killed $q8, 14 /* CC::al */, $noreg
170+
$r5 = VST1q32wb_fixed killed $r5, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248, align 32)
171+
$q4 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248 + 16, basealign 32)
172+
$q11 = VLD1q64 killed $r1, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235 + 16, basealign 32)
173+
$q7 = VMULfq $q4, $q11, 14 /* CC::al */, $noreg
174+
$r6 = VST1q32wb_fixed killed $r6, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230, align 32)
175+
$q3 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230 + 16, basealign 32)
176+
$q8 = VMULfq $q7, $q3, 14 /* CC::al */, $noreg
177+
$q12 = VMULfq killed $q12, killed $q6, 14 /* CC::al */, $noreg
178+
$q15 = VMLSfq killed $q15, killed $q12, $q0, 14 /* CC::al */, $noreg
179+
$r4 = VST1q32wb_fixed killed $r4, 16, killed $q15, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226, align 32)
180+
$q12 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226 + 16, basealign 32)
181+
$q8 = VMULfq killed $q8, $q12, 14 /* CC::al */, $noreg
182+
$q9 = VMULfq killed $q5, killed $q9, 14 /* CC::al */, $noreg
183+
$q10 = VMULfq killed $q10, killed $q13, 14 /* CC::al */, $noreg
184+
$q10 = VMLSfq killed $q10, killed $q9, $q0, 14 /* CC::al */, $noreg
185+
$r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241, align 32)
186+
$q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241 + 16, basealign 32)
187+
$q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg
188+
$s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5
189+
$s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
190+
$s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
191+
$s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5
192+
VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248 + 16, basealign 32)
193+
VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230 + 16, basealign 32)
194+
$q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load 16 from %stack.0)
195+
$q9 = VLDMQIA %stack.1, 14 /* CC::al */, $noreg :: (load 16 from %stack.1)
196+
$q8 = VMULfq killed $q9, killed $q8, 14 /* CC::al */, $noreg
197+
$q14 = VMLSfq killed $q14, killed $q8, killed $q0, 14 /* CC::al */, $noreg
198+
$r7 = VST1q32wb_fixed killed $r7, 16, killed $q14, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244, align 32)
199+
$q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244 + 16, basealign 32)
200+
$q9 = VLDMQIA %stack.3, 14 /* CC::al */, $noreg :: (load 16 from %stack.3)
201+
$q9 = VMULfq $q8, killed $q9, 14 /* CC::al */, $noreg
202+
$q7 = VMLSfq killed $q7, killed $q9, $q5, 14 /* CC::al */, $noreg
203+
VST1q64 killed $r7, 16, killed $q7, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244 + 16, basealign 32)
204+
$q9 = VLDMQIA %stack.2, 14 /* CC::al */, $noreg :: (load 16 from %stack.2)
205+
$q9 = VMULfq killed $q9, killed $q12, 14 /* CC::al */, $noreg
206+
$q4 = VMLSfq killed $q4, killed $q9, $q5, 14 /* CC::al */, $noreg
207+
VST1q64 killed $r4, 16, killed $q4, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226 + 16, basealign 32)
208+
$q8 = VMULfq killed $q8, $q3, 14 /* CC::al */, $noreg
209+
$q9 = VLD1q64 killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223 + 16, basealign 32)
210+
$q12 = COPY $q9
211+
$q12 = VMLSfq killed $q12, killed $q8, $q5, 14 /* CC::al */, $noreg
212+
VST1q64 killed $r2, 16, killed $q12, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238 + 16, basealign 32)
213+
$q8 = VMULfq $q11, killed $q3, 14 /* CC::al */, $noreg
214+
$q9 = VMULfq killed $q10, killed $q9, 14 /* CC::al */, $noreg
215+
$q8 = VMLSfq killed $q8, killed $q9, $q5, 14 /* CC::al */, $noreg
216+
VST1q64 killed $r3, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241 + 16, basealign 32)
217+
$q8 = VMULfq killed $q11, killed $q5, 14 /* CC::al */, $noreg
218+
VST1q64 killed $r12, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248 + 16, basealign 32)
219+
tBX_RET 14 /* CC::al */, $noreg
220+
221+
...

llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,18 @@ define arm_aapcs_vfpcc void @spill_multivector(<4 x i32>* %p) {
3535
; CHECK-NEXT: vld21.32 {q4, q5}, [r0]
3636
; CHECK-NEXT: bl external_function
3737
; CHECK-NEXT: vldmia sp, {d2, d3, d4, d5} @ 32-byte Reload
38-
; CHECK-NEXT: add r0, sp, #32
38+
; CHECK-NEXT: add.w lr, sp, #32
3939
; CHECK-NEXT: vstrw.32 q2, [r4, #80]
4040
; CHECK-NEXT: vstrw.32 q5, [r4, #144]
4141
; CHECK-NEXT: vstrw.32 q4, [r4, #128]
4242
; CHECK-NEXT: vstrw.32 q7, [r4, #112]
4343
; CHECK-NEXT: vstrw.32 q1, [r4, #64]
44-
; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
45-
; CHECK-NEXT: add r0, sp, #64
44+
; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
45+
; CHECK-NEXT: add.w lr, sp, #64
4646
; CHECK-NEXT: vstrw.32 q2, [r4, #48]
4747
; CHECK-NEXT: vstrw.32 q6, [r4, #96]
4848
; CHECK-NEXT: vstrw.32 q1, [r5]
49-
; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
49+
; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload
5050
; CHECK-NEXT: vstrw.32 q2, [r4, #16]
5151
; CHECK-NEXT: vstrw.32 q1, [r4]
5252
; CHECK-NEXT: add sp, #112

0 commit comments

Comments
 (0)