1
- /***************************************************************************
2
- Copyright (c) 2013-2016, The OpenBLAS Project
3
- All rights reserved.
4
- Redistribution and use in source and binary forms, with or without
5
- modification, are permitted provided that the following conditions are
6
- met:
7
- 1. Redistributions of source code must retain the above copyright
8
- notice, this list of conditions and the following disclaimer.
9
- 2. Redistributions in binary form must reproduce the above copyright
10
- notice, this list of conditions and the following disclaimer in
11
- the documentation and/or other materials provided with the
12
- distribution.
13
- 3. Neither the name of the OpenBLAS project nor the names of
14
- its contributors may be used to endorse or promote products
15
- derived from this software without specific prior written permission.
16
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
- ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25
- USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
- *****************************************************************************/
27
-
28
- /**************************************************************************************
29
- * 2016/03/05 Werner Saar ([email protected] )
30
- * BLASTEST : OK
31
- * CTEST : OK
32
- * TEST : OK
33
- * LAPACK-TEST : OK
34
- **************************************************************************************/
35
-
36
1
/*********************************************************************/
37
2
/* Copyright 2009, 2010 The University of Texas at Austin. */
38
3
/* All rights reserved. */
@@ -82,7 +47,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
82
47
#endif
83
48
84
49
#ifdef __64BIT__
85
- #define STACKSIZE 320
50
+ #define STACKSIZE 32000
86
51
#define ALPHA_R_SP 296 (SP)
87
52
#define ALPHA_I_SP 304 (SP)
88
53
#define FZERO 312 (SP)
@@ -133,11 +98,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
133
98
#define alpha_r vs30
134
99
#define alpha_i vs31
135
100
101
+
102
+ #define FRAMEPOINTER r12
103
+
104
+ #define BBUFFER r14
105
+
136
106
#define L r15
137
107
#define ALPHA r16
138
108
#define o24 r17
139
109
#define T2 r19
140
- #define KK r20
110
+ #define BBO r20
141
111
#define o8 r21
142
112
#define I r22
143
113
#define J r23
@@ -156,8 +126,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
156
126
PROLOGUE
157
127
PROFCODE
158
128
159
- addi SP, SP, -STACKSIZE
160
- li r0, 0
129
+ mr FRAMEPOINTER, SP
130
+ addi SP, SP, -STACKSIZE
131
+ addi SP, SP, -STACKSIZE
132
+ addi SP, SP, -STACKSIZE
133
+ addi SP, SP, -STACKSIZE
134
+ li r0, 0
161
135
162
136
stfd f14, 0 (SP)
163
137
stfd f15, 8 (SP)
@@ -200,6 +174,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
200
174
std r17, 256 (SP)
201
175
std r16, 264 (SP)
202
176
std r15, 272 (SP)
177
+ std r14, 280 (SP)
203
178
#else
204
179
stw r31, 144 (SP)
205
180
stw r30, 148 (SP)
@@ -226,37 +201,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
226
201
227
202
#ifdef linux
228
203
#ifdef __64BIT__
229
- ld LDC, FRAMESLOT(0 ) + STACKSIZE(SP )
204
+ ld LDC, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
230
205
#endif
231
206
#endif
232
207
233
208
#if defined(_AIX) || defined(__APPLE__)
234
209
#ifdef __64BIT__
235
- ld LDC, FRAMESLOT(0 ) + STACKSIZE(SP )
210
+ ld LDC, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
236
211
#else
237
212
#ifdef DOUBLE
238
- lwz B, FRAMESLOT(0 ) + STACKSIZE(SP )
239
- lwz C, FRAMESLOT(1 ) + STACKSIZE(SP )
240
- lwz LDC, FRAMESLOT(2 ) + STACKSIZE(SP )
213
+ lwz B, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
214
+ lwz C, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
215
+ lwz LDC, FRAMESLOT(2 ) + 0 (FRAMEPOINTER )
241
216
#else
242
- lwz LDC, FRAMESLOT(0 ) + STACKSIZE(SP )
217
+ lwz LDC, FRAMESLOT(0 ) + 0 (FRAMEPOINTER )
243
218
#endif
244
219
#endif
245
220
#endif
246
221
247
222
#ifdef TRMMKERNEL
248
223
#if defined(linux) && defined(__64BIT__)
249
- ld OFFSET, FRAMESLOT(1 ) + STACKSIZE(SP )
224
+ ld OFFSET, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
250
225
#endif
251
226
252
227
#if defined(_AIX) || defined(__APPLE__)
253
228
#ifdef __64BIT__
254
- ld OFFSET, FRAMESLOT(1 ) + STACKSIZE(SP )
229
+ ld OFFSET, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
255
230
#else
256
231
#ifdef DOUBLE
257
- lwz OFFSET, FRAMESLOT(3 ) + STACKSIZE(SP )
232
+ lwz OFFSET, FRAMESLOT(3 ) + 0 (FRAMEPOINTER )
258
233
#else
259
- lwz OFFSET, FRAMESLOT(1 ) + STACKSIZE(SP )
234
+ lwz OFFSET, FRAMESLOT(1 ) + 0 (FRAMEPOINTER )
260
235
#endif
261
236
#endif
262
237
#endif
@@ -268,34 +243,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
268
243
#include "zgemm_macros_8x2_power8.S"
269
244
270
245
cmpwi cr0, M, 0
271
- ble . L999
246
+ ble L999
272
247
cmpwi cr0, N, 0
273
- ble . L999
248
+ ble L999
274
249
cmpwi cr0, K, 0
275
- ble . L999
250
+ ble L999
276
251
277
252
slwi LDC, LDC, ZBASE_SHIFT
278
- li PRE, 256
253
+ li PRE, 384
279
254
li o8 , 8
280
255
li o16 , 16
281
256
li o24 , 24
282
257
li o32 , 32
283
258
li o48 , 48
284
259
260
+ addi BBUFFER, SP, 512 +4096
261
+ li T1, -4096
262
+ and BBUFFER, BBUFFER, T1
263
+
285
264
#ifdef __64BIT__
286
265
addi ALPHA, SP, 296
287
266
#else
288
267
addi ALPHA, SP, 224
289
268
#endif
290
269
291
- lxvdsx alpha_r, 0 , ALPHA
292
- lxvdsx alpha_i, o8, ALPHA
270
+ lxsdx alpha_r, 0 , ALPHA
271
+ lxsdx alpha_i, o8, ALPHA
293
272
294
- .align 5
273
+ .align 4
295
274
296
275
#include "zgemm_logic_8x2_power8.S"
297
276
298
- . L999:
277
+ L999:
299
278
addi r3, 0 , 0
300
279
301
280
lfd f14, 0 (SP)
@@ -339,6 +318,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
339
318
ld r17, 256 (SP)
340
319
ld r16, 264 (SP)
341
320
ld r15, 272 (SP)
321
+ ld r14, 280 (SP)
342
322
#else
343
323
lwz r31, 144 (SP)
344
324
lwz r30, 148 (SP)
@@ -360,6 +340,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
360
340
#endif
361
341
362
342
addi SP, SP, STACKSIZE
343
+ addi SP, SP, STACKSIZE
344
+ addi SP, SP, STACKSIZE
345
+ addi SP, SP, STACKSIZE
363
346
364
347
blr
365
348
0 commit comments