Skip to content

Commit e1cdd15

Browse files
committed
Merge pull request #832 from wernsaar/develop
updated cgemm- and ctrmm-kernel for POWER8
2 parents 99adc8b + d4c0330 commit e1cdd15

File tree

6 files changed

+2727
-2583
lines changed

6 files changed

+2727
-2583
lines changed

benchmark/Makefile

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread
3333
# Apple vecLib
3434
LIBVECLIB = -framework Accelerate
3535

36+
ESSL=/opt/ibm/lib
37+
LIBESSL = -lessl $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.2/lib/libxl.a
38+
3639
ifeq ($(OSNAME), WINNT)
3740

3841
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
@@ -255,7 +258,8 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
255258

256259
endif
257260

258-
261+
essl :: sgemm.essl strmm.essl dgemm.essl dtrmm.essl \
262+
cgemm.essl ctrmm.essl zgemm.essl ztrmm.essl
259263

260264
veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
261265
scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \
@@ -443,6 +447,9 @@ sgemm.mkl : sgemm.$(SUFFIX)
443447
sgemm.veclib : sgemm.$(SUFFIX)
444448
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
445449

450+
sgemm.essl : sgemm.$(SUFFIX)
451+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
452+
446453
##################################### Dgemm ####################################################
447454
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
448455
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -459,6 +466,9 @@ dgemm.mkl : dgemm.$(SUFFIX)
459466
dgemm.veclib : dgemm.$(SUFFIX)
460467
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
461468

469+
dgemm.essl : dgemm.$(SUFFIX)
470+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
471+
462472
##################################### Cgemm ####################################################
463473

464474
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
@@ -476,6 +486,9 @@ cgemm.mkl : cgemm.$(SUFFIX)
476486
cgemm.veclib : cgemm.$(SUFFIX)
477487
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
478488

489+
cgemm.essl : cgemm.$(SUFFIX)
490+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
491+
479492
##################################### Zgemm ####################################################
480493

481494
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
@@ -493,6 +506,9 @@ zgemm.mkl : zgemm.$(SUFFIX)
493506
zgemm.veclib : zgemm.$(SUFFIX)
494507
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
495508

509+
zgemm.essl : zgemm.$(SUFFIX)
510+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
511+
496512
##################################### Ssymm ####################################################
497513
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
498514
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -575,6 +591,9 @@ strmm.mkl : strmm.$(SUFFIX)
575591
strmm.veclib : strmm.$(SUFFIX)
576592
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
577593

594+
strmm.essl : strmm.$(SUFFIX)
595+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
596+
578597
##################################### Dtrmm ####################################################
579598
dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME)
580599
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -591,6 +610,9 @@ dtrmm.mkl : dtrmm.$(SUFFIX)
591610
dtrmm.veclib : dtrmm.$(SUFFIX)
592611
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
593612

613+
dtrmm.essl : dtrmm.$(SUFFIX)
614+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
615+
594616
##################################### Ctrmm ####################################################
595617

596618
ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME)
@@ -608,6 +630,9 @@ ctrmm.mkl : ctrmm.$(SUFFIX)
608630
ctrmm.veclib : ctrmm.$(SUFFIX)
609631
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
610632

633+
ctrmm.essl : ctrmm.$(SUFFIX)
634+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
635+
611636
##################################### Ztrmm ####################################################
612637

613638
ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME)
@@ -625,6 +650,9 @@ ztrmm.mkl : ztrmm.$(SUFFIX)
625650
ztrmm.veclib : ztrmm.$(SUFFIX)
626651
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
627652

653+
ztrmm.essl : ztrmm.$(SUFFIX)
654+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
655+
628656
##################################### Strsm ####################################################
629657
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME)
630658
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -2179,7 +2207,7 @@ smallscaling: smallscaling.c ../$(LIBNAME)
21792207
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm
21802208

21812209
clean ::
2182-
@rm -f *.goto *.mkl *.acml *.atlas *.veclib
2210+
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl
21832211

21842212
include $(TOPDIR)/Makefile.tail
21852213

kernel/power/cgemm_kernel_8x4_power8.S

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
*****************************************************************************/
2727

2828
/**************************************************************************************
29-
* 2016/03/18 Werner Saar ([email protected])
29+
* 2016/04/03 Werner Saar ([email protected])
3030
* BLASTEST : OK
3131
* CTEST : OK
3232
* TEST : OK
33-
* LAPACK-TEST : OK
33+
* LAPACK-TEST : OK
3434
**************************************************************************************/
3535

3636
/*********************************************************************/
@@ -130,10 +130,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
130130
#endif
131131

132132
#define o0 0
133-
#define alpha_r vs30
134-
#define alpha_i vs31
135133

136-
#define TBUFFER r14
134+
#define alpha_dr vs28
135+
#define alpha_di vs29
136+
#define alpha_sr vs30
137+
#define alpha_si vs31
138+
139+
140+
#define NOTUSED r14
137141
#define L r15
138142
#define o12 r16
139143
#define o4 r17
@@ -271,21 +275,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271275
#include "cgemm_macros_8x4_power8.S"
272276

273277
cmpwi cr0, M, 0
274-
ble .L999_H1
278+
ble L999_H1
275279
cmpwi cr0, N, 0
276-
ble .L999_H1
280+
ble L999_H1
277281
cmpwi cr0, K, 0
278-
ble .L999_H1
282+
ble L999_H1
279283

280284
slwi LDC, LDC, ZBASE_SHIFT
281-
li PRE, 256
285+
li PRE, 384
282286
li o4 , 4
283287
li o8 , 8
284288
li o12 , 12
285289
li o16 , 16
286290
li o32 , 32
287291
li o48 , 48
288-
addi TBUFFER, SP, 360
289292

290293

291294
#ifdef __64BIT__
@@ -294,14 +297,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
294297
addi T1 , SP, 224
295298
#endif
296299

297-
lxsspx alpha_r, 0, T1
298-
lxsspx alpha_i, o8, T1
300+
stxsspx vs1, 0, T1
301+
lxsspx alpha_dr, 0, T1
302+
stxsspx vs2, o8 , T1
303+
lxsspx alpha_di, o8, T1
304+
addi T1, SP, 360
305+
li T2, 0
306+
307+
stw T2, 0(T1)
308+
stw T2, 4(T1)
309+
stw T2, 8(T1)
310+
stxsspx alpha_dr, o12, T1
311+
lxvw4x alpha_sr, o0 , T1
312+
addi T1, T1, 16
313+
314+
stw T2, 0(T1)
315+
stw T2, 4(T1)
316+
stw T2, 8(T1)
317+
stxsspx alpha_di, o12, T1
318+
lxvw4x alpha_si, o0 , T1
299319

300320
.align 5
301321

302322
#include "cgemm_logic_8x4_power8.S"
303323

304-
.L999:
324+
L999:
305325
addi r3, 0, 0
306326

307327
lfd f14, 0(SP)

0 commit comments

Comments
 (0)