Skip to content

Commit 0d96b0e

Browse files
Merge branch 'z13' into develop
2 parents dc34a0d + 848cb27 commit 0d96b0e

File tree

6 files changed

+2096
-73
lines changed

6 files changed

+2096
-73
lines changed

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,5 +164,6 @@ In chronological order:
164164

165165
* Abdelrauf <https://github.com/quickwritereader>
166166
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
167+
* [2017-02-26] ztrmm kernel for IBM z13
167168

168169

kernel/zarch/KERNEL.Z13

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ ZGEMVTKERNEL = ../arm/zgemv_t.c
8383
STRMMKERNEL = ../generic/trmmkernel_2x2.c
8484
DTRMMKERNEL = trmm8x4V.S
8585
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
86-
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
86+
ZTRMMKERNEL = ztrmm4x4V.S
8787

8888
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
8989
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
@@ -109,11 +109,11 @@ CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
109109
CGEMMONCOPYOBJ = cgemm_oncopy.o
110110
CGEMMOTCOPYOBJ = cgemm_otcopy.o
111111

112-
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
113-
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
114-
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
115-
ZGEMMONCOPYOBJ = zgemm_oncopy.o
116-
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
112+
ZGEMMKERNEL = ztrmm4x4V.S
113+
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
114+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
115+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
116+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
117117

118118
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
119119
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

kernel/zarch/kernelMacros.S

Lines changed: 9 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,71 +1455,19 @@
14551455
lay \TEMP_VAL,-\C_B(\TEMP_VAL)
14561456
#endif
14571457
/*ptrba += temp*C_A;
1458-
ptrbb += temp*C_B;*/
1459-
.if \C_B==4
1458+
ptrbb += temp*C_B;*/
14601459
.if \C_A==8
1461-
sllg \TEMP_VAL, \TEMP_VAL,5 /*temp*4*/
1462-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1463-
agr \PTR_A, \TEMP_VAL /*ptrba+temp*C_A*/
1464-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1460+
sllg \TEMP_VAL, \TEMP_VAL,6
14651461
.elseif \C_A==4
1466-
sllg \TEMP_VAL, \TEMP_VAL,5 /*temp*4*/
1467-
agr \PTR_B, \TEMP_VAL /*ptrbb+temp*C_B*/
1468-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1462+
sllg \TEMP_VAL, \TEMP_VAL,5 /*temp*4*/
14691463
.elseif \C_A==2
1470-
sllg \TEMP_VAL, \TEMP_VAL,4 /*temp*2*/
1471-
agr \PTR_B, \TEMP_VAL /*ptrbb+temp*C_B*/
1472-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1473-
agr \PTR_B, \TEMP_VAL /*ptrbb+temp*C_B*/
1464+
sllg \TEMP_VAL, \TEMP_VAL,4 /*temp*2*/
14741465
.elseif \C_A==1
1475-
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*/
1476-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1477-
sllg \TEMP_VAL, \TEMP_VAL,2 /*temp*2*2*/
1478-
agr \PTR_B, \TEMP_VAL /*ptrbb+temp*C_B*/
1479-
.endif
1480-
.elseif \C_B==2
1481-
.if \C_A==8
1482-
sllg \TEMP_VAL, \TEMP_VAL,4 /*temp*2*/
1483-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1484-
sllg \TEMP_VAL, \TEMP_VAL,2 /*temp*2*4 */
1485-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1486-
.elseif \C_A==4
1487-
sllg \TEMP_VAL, \TEMP_VAL,4 /*temp*2*/
1488-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1489-
agr \TEMP_VAL, \TEMP_VAL
1490-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1491-
.elseif \C_A==2
1492-
sllg \TEMP_VAL, \TEMP_VAL,4 /*temp*2*/
1493-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1494-
agr \PTR_A, \TEMP_VAL /*ptrba+temp*C_A*/
1495-
.elseif \C_A==1
1496-
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*/
1497-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1498-
agr \PTR_A, \TEMP_VAL /*ptrba+temp*C_A*/
1499-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1500-
.endif
1501-
.elseif \C_B==1
1502-
.if \C_A==8
1503-
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*/
1504-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1505-
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*8 */
1506-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1507-
.elseif \C_A==4
1508-
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*/
1509-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1510-
sllg \TEMP_VAL, \TEMP_VAL,2 /*temp*1*4 */
1511-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1512-
.elseif \C_A==2
1513-
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*/
1514-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1515-
agr \TEMP_VAL, \TEMP_VAL
1516-
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1517-
.elseif \C_A==1
1518-
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*/
1519-
la \PTR_B,0(\PTR_B,\TEMP_VAL) /*ptrbb+temp*C_B*/
1520-
agr \PTR_A, \TEMP_VAL /*ptrba+temp*C_A*/
1521-
.endif
1522-
.endif
1466+
sllg \TEMP_VAL, \TEMP_VAL,3 /*temp*1*/
1467+
.endif
1468+
la \PTR_A,0(\PTR_A,\TEMP_VAL) /*ptrba+temp*C_A*/
1469+
/*we do not need to refresh ptrbb. so lets ignore it*/
1470+
15231471
#endif
15241472

15251473
#ifdef LEFT

0 commit comments

Comments
 (0)