Skip to content

Commit 99adc8b

Browse files
committed
Merge pull request #831 from wernsaar/develop
updated sgemm- and strmm-kernel for POWER8
2 parents 3349e9d + 6a9bbfc commit 99adc8b

7 files changed

+8217
-4632
lines changed

kernel/power/sgemm_kernel_16x8_power8.S

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
*****************************************************************************/
2727

2828
/**************************************************************************************
29-
* 2016/03/18 Werner Saar ([email protected])
29+
* 2016/04/02 Werner Saar ([email protected])
3030
* BLASTEST : OK
3131
* CTEST : OK
3232
* TEST : OK
33-
* LAPACK-TEST : OK
33+
* LAPACK-TEST : OK
3434
**************************************************************************************/
3535

3636
/*********************************************************************/
@@ -128,17 +128,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
128128
#endif
129129

130130
#define alpha_r vs30
131+
#define alpha_vr vs31
131132

132133
#define o0 0
133134

134-
#define TBUFFER r14
135+
#define BBUFFER r14
135136
#define o4 r15
136137
#define o12 r16
137138
#define o8 r17
138139
#define L r18
139140
#define T1 r19
140141
#define KK r20
141-
#define BB r21
142+
#define BBO r21
142143
#define I r22
143144
#define J r23
144145
#define AO r24
@@ -256,11 +257,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
256257

257258

258259
cmpwi cr0, M, 0
259-
ble .L999_H1
260+
ble L999_H1
260261
cmpwi cr0, N, 0
261-
ble .L999_H1
262+
ble L999_H1
262263
cmpwi cr0, K, 0
263-
ble .L999_H1
264+
ble L999_H1
264265

265266
li PRE, 256
266267
li o4 , 4
@@ -269,18 +270,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
269270
li o16, 16
270271
li o32, 32
271272
li o48, 48
272-
addi TBUFFER, SP, 320
273+
274+
li T1, 256
275+
slwi T1, T1, 9 // 131072
276+
sub BBUFFER, A, T1 // temp buffer for B unrolled
273277

274278
addi T1, SP, 300
275-
stfs f1, 0(T1)
279+
stxsspx f1, o0 , T1
280+
stxsspx f1, o4 , T1
281+
stxsspx f1, o8 , T1
282+
stxsspx f1, o12 , T1
276283

277-
lxsspx alpha_r, 0, T1
284+
lxsspx alpha_r, o0, T1
285+
lxvw4x alpha_vr, o0, T1
278286

279287

280288

281289
#include "sgemm_logic_16x8_power8.S"
282290

283-
.L999:
291+
L999:
284292
addi r3, 0, 0
285293

286294
lfd f14, 0(SP)

0 commit comments

Comments
 (0)