@@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
26
*****************************************************************************/
27
27
28
28
/**************************************************************************************
29
- * 2016/03/18 Werner Saar ([email protected] )
29
+ * 2016/04/03 Werner Saar ([email protected] )
30
30
* BLASTEST : OK
31
31
* CTEST : OK
32
32
* TEST : OK
33
- * LAPACK-TEST : OK
33
+ * LAPACK-TEST : OK
34
34
**************************************************************************************/
35
35
36
36
/*********************************************************************/
@@ -130,10 +130,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
130
130
#endif
131
131
132
132
#define o0 0
133
- #define alpha_r vs30
134
- #define alpha_i vs31
135
133
136
- #define TBUFFER r14
134
+ #define alpha_dr vs28
135
+ #define alpha_di vs29
136
+ #define alpha_sr vs30
137
+ #define alpha_si vs31
138
+
139
+
140
+ #define NOTUSED r14
137
141
#define L r15
138
142
#define o12 r16
139
143
#define o4 r17
@@ -271,21 +275,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271
275
#include "cgemm_macros_8x4_power8.S"
272
276
273
277
cmpwi cr0, M, 0
274
- ble . L999_H1
278
+ ble L999_H1
275
279
cmpwi cr0, N, 0
276
- ble . L999_H1
280
+ ble L999_H1
277
281
cmpwi cr0, K, 0
278
- ble . L999_H1
282
+ ble L999_H1
279
283
280
284
slwi LDC, LDC, ZBASE_SHIFT
281
- li PRE, 256
285
+ li PRE, 384
282
286
li o4 , 4
283
287
li o8 , 8
284
288
li o12 , 12
285
289
li o16 , 16
286
290
li o32 , 32
287
291
li o48 , 48
288
- addi TBUFFER, SP, 360
289
292
290
293
291
294
#ifdef __64BIT__
@@ -294,14 +297,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
294
297
addi T1 , SP, 224
295
298
#endif
296
299
297
- lxsspx alpha_r, 0 , T1
298
- lxsspx alpha_i, o8, T1
300
+ stxsspx vs1, 0 , T1
301
+ lxsspx alpha_dr, 0 , T1
302
+ stxsspx vs2, o8 , T1
303
+ lxsspx alpha_di, o8, T1
304
+ addi T1, SP, 360
305
+ li T2, 0
306
+
307
+ stw T2, 0 (T1)
308
+ stw T2, 4 (T1)
309
+ stw T2, 8 (T1)
310
+ stxsspx alpha_dr, o12, T1
311
+ lxvw4x alpha_sr, o0 , T1
312
+ addi T1, T1, 16
313
+
314
+ stw T2, 0 (T1)
315
+ stw T2, 4 (T1)
316
+ stw T2, 8 (T1)
317
+ stxsspx alpha_di, o12, T1
318
+ lxvw4x alpha_si, o0 , T1
299
319
300
320
.align 5
301
321
302
322
#include "cgemm_logic_8x4_power8.S"
303
323
304
- . L999:
324
+ L999:
305
325
addi r3, 0 , 0
306
326
307
327
lfd f14, 0 (SP)
0 commit comments