Skip to content

Commit 1eb43cc

Browse files
authored
Merge pull request #1317 from martin-frbg/power8-asm
Save and restore VSX registers
2 parents 514d237 + 9c017a2 commit 1eb43cc

15 files changed

+884
-89
lines changed

kernel/power/cgemm_kernel_8x4_power8.S

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
#endif
8383

8484
#ifdef __64BIT__
85-
#define STACKSIZE 32000
86-
#define ALPHA_R_SP 296(SP)
87-
#define ALPHA_I_SP 304(SP)
88-
#define FZERO 312(SP)
85+
#define STACKSIZE 32196
86+
#define ALPHA_R_SP 296+196(SP)
87+
#define ALPHA_I_SP 304+196(SP)
88+
#define FZERO 312+196(SP)
8989
#else
90-
#define STACKSIZE 256
91-
#define ALPHA_R_SP 224(SP)
92-
#define ALPHA_I_SP 232(SP)
93-
#define FZERO 240(SP)
90+
#define STACKSIZE 456
91+
#define ALPHA_R_SP 224+200(SP)
92+
#define ALPHA_I_SP 232+200(SP)
93+
#define FZERO 240+200(SP)
9494
#endif
9595

9696
#define M r3
@@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
138138

139139
#define FRAMEPOINTER r12
140140

141+
#define VECSAVE r11
142+
141143
#define BBUFFER r14
142144
#define L r15
143145
#define o12 r16
@@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
167169
addi SP, SP, -STACKSIZE
168170
addi SP, SP, -STACKSIZE
169171
addi SP, SP, -STACKSIZE
172+
170173
li r0, 0
171174

172175
stfd f14, 0(SP)
@@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
211214
std r16, 264(SP)
212215
std r15, 272(SP)
213216
std r14, 280(SP)
217+
addi r11, SP, 288
214218
#else
215219
stw r31, 144(SP)
216220
stw r30, 148(SP)
@@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
230234
stw r16, 204(SP)
231235
stw r15, 208(SP)
232236
stw r14, 212(SP)
237+
addi r11, SP, 224
233238
#endif
239+
stvx v20, r11, r0
240+
addi r11, r11, 16
241+
stvx v21, r11, r0
242+
addi r11, r11, 16
243+
stvx v22, r11, r0
244+
addi r11, r11, 16
245+
stvx v23, r11, r0
246+
addi r11, r11, 16
247+
stvx v24, r11, r0
248+
addi r11, r11, 16
249+
stvx v25, r11, r0
250+
addi r11, r11, 16
251+
stvx v26, r11, r0
252+
addi r11, r11, 16
253+
stvx v27, r11, r0
254+
addi r11, r11, 16
255+
stvx v28, r11, r0
256+
addi r11, r11, 16
257+
stvx v29, r11, r0
258+
addi r11, r11, 16
259+
stvx v30, r11, r0
260+
addi r11, r11, 16
261+
stvx v31, r11, r0
262+
li r11, 0
234263

235264
stfs f1, ALPHA_R_SP
236265
stfs f2, ALPHA_I_SP
@@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
301330

302331

303332
#ifdef __64BIT__
304-
addi T1 , SP, 296
333+
addi T1 , SP, 296+196
305334
#else
306-
addi T1 , SP, 224
335+
addi T1 , SP, 224+200
307336
#endif
308337

309338
stxsspx vs1, 0, T1
@@ -375,6 +404,7 @@ L999:
375404
ld r16, 264(SP)
376405
ld r15, 272(SP)
377406
ld r14, 280(SP)
407+
addi r11, SP, 288
378408
#else
379409
lwz r31, 144(SP)
380410
lwz r30, 148(SP)
@@ -394,7 +424,32 @@ L999:
394424
lwz r16, 204(SP)
395425
lwz r15, 208(SP)
396426
lwz r14, 212(SP)
427+
addi r11, 224
397428
#endif
429+
lvx v20, r11, r0
430+
addi r11, r11, 16
431+
lvx v21, r11, r0
432+
addi r11, r11, 16
433+
lvx v22, r11, r0
434+
addi r11, r11, 16
435+
lvx v23, r11, r0
436+
addi r11, r11, 16
437+
lvx v24, r11, r0
438+
addi r11, r11, 16
439+
lvx v25, r11, r0
440+
addi r11, r11, 16
441+
lvx v26, r11, r0
442+
addi r11, r11, 16
443+
lvx v27, r11, r0
444+
addi r11, r11, 16
445+
lvx v28, r11, r0
446+
addi r11, r11, 16
447+
lvx v29, r11, r0
448+
addi r11, r11, 16
449+
lvx v30, r11, r0
450+
addi r11, r11, 16
451+
lvx v31, r11, r0
452+
li r11, 0
398453

399454
addi SP, SP, STACKSIZE
400455
addi SP, SP, STACKSIZE
@@ -404,4 +459,4 @@ L999:
404459
blr
405460

406461
EPILOGUE
407-
#endif
462+
#endif^

kernel/power/cgemm_tcopy_8_power8.S

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8888

8989
#define J r12
9090

91+
9192
#define PREA r14
9293
#define PREB r15
9394
#define BO r16
@@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
109110

110111
#include "cgemm_tcopy_macros_8_power8.S"
111112

112-
#define STACKSIZE 384
113+
#define STACKSIZE 576
113114

114115

115116
PROLOGUE
@@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
136137
std r16, 264(SP)
137138
std r15, 272(SP)
138139
std r14, 280(SP)
140+
addi r11, SP, 288
141+
stvx v20, r11, r0
142+
addi r11, r11, 16
143+
stvx v21, r11, r0
144+
addi r11, r11, 16
145+
stvx v22, r11, r0
146+
addi r11, r11, 16
147+
stvx v23, r11, r0
148+
addi r11, r11, 16
149+
stvx v24, r11, r0
150+
addi r11, r11, 16
151+
stvx v25, r11, r0
152+
addi r11, r11, 16
153+
stvx v26, r11, r0
154+
addi r11, r11, 16
155+
stvx v27, r11, r0
156+
addi r11, r11, 16
157+
stvx v28, r11, r0
158+
addi r11, r11, 16
159+
stvx v29, r11, r0
160+
addi r11, r11, 16
161+
stvx v30, r11, r0
162+
addi r11, r11, 16
163+
stvx v31, r11, r0
164+
li r11, 0
139165

140166
cmpwi cr0, M, 0
141167
ble- L999
@@ -197,9 +223,33 @@ L999:
197223
ld r16, 264(SP)
198224
ld r15, 272(SP)
199225
ld r14, 280(SP)
226+
addi r11, SP, 288
227+
lvx v20, r11, r3
228+
addi r11, r11, 16
229+
lvx v21, r11, r3
230+
addi r11, r11, 16
231+
lvx v22, r11, r3
232+
addi r11, r11, 16
233+
lvx v23, r11, r3
234+
addi r11, r11, 16
235+
lvx v24, r11, r3
236+
addi r11, r11, 16
237+
lvx v25, r11, r3
238+
addi r11, r11, 16
239+
lvx v26, r11, r3
240+
addi r11, r11, 16
241+
lvx v27, r11, r3
242+
addi r11, r11, 16
243+
lvx v28, r11, r3
244+
addi r11, r11, 16
245+
lvx v29, r11, r3
246+
addi r11, r11, 16
247+
lvx v30, r11, r3
248+
addi r11, r11, 16
249+
lvx v31, r11, r3
250+
li r11, 0
200251

201252
addi SP, SP, STACKSIZE
202-
203253
blr
204254
EPILOGUE
205255

kernel/power/ctrmm_kernel_8x4_power8.S

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8383

8484
#ifdef __64BIT__
8585
#define STACKSIZE 400
86-
#define ALPHA_R_SP 304(SP)
87-
#define ALPHA_I_SP 312(SP)
86+
#define STACKSIZE 592
87+
#define ALPHA_R_SP 304+192(SP)
88+
#define ALPHA_I_SP 312+192(SP)
8889
#else
8990
#define STACKSIZE 256
90-
#define ALPHA_R_SP 224(SP)
91-
#define ALPHA_I_SP 232(SP)
92-
#define FZERO 240(SP)
91+
#define STACKSIZE 452
92+
#define ALPHA_R_SP 224+196(SP)
93+
#define ALPHA_I_SP 232+196(SP)
94+
#define FZERO 240+196(SP)
9395
#endif
9496

9597
#define M r3
@@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
135137
#define alpha_sr vs30
136138
#define alpha_si vs31
137139

140+
#define VECSAVE r11
141+
138142
#define o12 r12
139143
#define KKK r13
140144
#define K1 r14
@@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
208212
std r14, 280(SP)
209213
std r13, 288(SP)
210214
std r12, 296(SP)
215+
addi r11, SP, 304
211216
#else
212217
stw r31, 144(SP)
213218
stw r30, 148(SP)
@@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
228233
stw r15, 208(SP)
229234
stw r14, 212(SP)
230235
stw r13, 216(SP)
236+
addi r11, SP, 224
231237
#endif
238+
stvx v20, r11, r0
239+
addi r11, r11, 16
240+
stvx v21, r11, r0
241+
addi r11, r11, 16
242+
stvx v22, r11, r0
243+
addi r11, r11, 16
244+
stvx v23, r11, r0
245+
addi r11, r11, 16
246+
stvx v24, r11, r0
247+
addi r11, r11, 16
248+
stvx v25, r11, r0
249+
addi r11, r11, 16
250+
stvx v26, r11, r0
251+
addi r11, r11, 16
252+
stvx v27, r11, r0
253+
addi r11, r11, 16
254+
stvx v28, r11, r0
255+
addi r11, r11, 16
256+
stvx v29, r11, r0
257+
addi r11, r11, 16
258+
stvx v30, r11, r0
259+
addi r11, r11, 16
260+
stvx v31, r11, r0
261+
li r11, 0
232262

233263
stfs f1, ALPHA_R_SP
234264
stfs f2, ALPHA_I_SP
@@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295325

296326

297327
#ifdef __64BIT__
298-
addi T1, SP, 304
328+
addi T1, SP, 304+192
299329
#else
300-
addi T1, SP, 224
330+
addi T1, SP, 224+196
301331
#endif
302332

303333
lxsspx alpha_dr, 0, T1
@@ -369,6 +399,7 @@ L999:
369399
ld r14, 280(SP)
370400
ld r13, 288(SP)
371401
ld r12, 296(SP)
402+
addi r11, SP, 304
372403
#else
373404
lwz r31, 144(SP)
374405
lwz r30, 148(SP)
@@ -389,10 +420,34 @@ L999:
389420
lwz r15, 208(SP)
390421
lwz r14, 212(SP)
391422
lwz r13, 216(SP)
423+
addi r11, SP, 224
392424
#endif
425+
lvx v20, r11, r3
426+
addi r11, r11, 16
427+
lvx v21, r11, r3
428+
addi r11, r11, 16
429+
lvx v22, r11, r3
430+
addi r11, r11, 16
431+
lvx v23, r11, r3
432+
addi r11, r11, 16
433+
lvx v24, r11, r3
434+
addi r11, r11, 16
435+
lvx v25, r11, r3
436+
addi r11, r11, 16
437+
lvx v26, r11, r3
438+
addi r11, r11, 16
439+
lvx v27, r11, r3
440+
addi r11, r11, 16
441+
lvx v28, r11, r3
442+
addi r11, r11, 16
443+
lvx v29, r11, r3
444+
addi r11, r11, 16
445+
lvx v30, r11, r3
446+
addi r11, r11, 16
447+
lvx v31, r11, r3
448+
li r11, 0
393449

394450
addi SP, SP, STACKSIZE
395-
396451
blr
397452

398453
EPILOGUE

0 commit comments

Comments
 (0)