Skip to content

Commit 9e4b697

Browse files
authored
Merge pull request #1043 from quickwritereader/z13
Z13
2 parents dd43661 + 7f2a959 commit 9e4b697

File tree

11 files changed

+3221
-5
lines changed

11 files changed

+3221
-5
lines changed

CONTRIBUTORS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,7 @@ In chronological order:
150150
* theoractice <https://github.com/theoractice/>
151151
* [2016-03-20] Fix compiler error in VisualStudio with CMake
152152
* [2016-03-22] Fix access violation on Windows while static linking
153+
154+
* Abdelrauf <https://github.com/quickwritereader>
155+
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
156+

Makefile.zarch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
ifeq ($(CORE), Z13)
3-
CCOMMON_OPT += -march=z13
4-
FCOMMON_OPT += -march=z13
3+
CCOMMON_OPT += -march=z13 -mzvector
4+
FCOMMON_OPT += -march=z13 -mzvector
55
endif
66

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ Please read GotoBLAS_01Readme.txt
7777
- **ARMV8**: Experimental
7878
- **ARM Cortex-A57**: Experimental
7979

80+
#### IBM zEnterprise System:
81+
- **Z13**: blas3 for double
82+
```
83+
git checkout z13
84+
make USE_TRMM=1
85+
```
86+
87+
8088
### Support OS:
8189
- **GNU/Linux**
8290
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.

common_zarch.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,11 @@ static inline int blas_quickdivide(blasint x, blasint y){
103103

104104
#define PROLOGUE \
105105
.text ;\
106-
.align 4 ;\
106+
.align 256 ;\
107107
.global REALNAME ;\
108108
.type REALNAME, %function ;\
109109
REALNAME:
110+
110111

111112
#define EPILOGUE
112113

cpuid_zarch.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ static char *cpuname_lower[] = {
4242

4343
int detect(void)
4444
{
45-
return CPU_GENERIC;
45+
// return CPU_GENERIC;
46+
return CPU_Z13;
47+
4648
}
4749

4850
void get_libname(void)

kernel/zarch/KERNEL.Z13

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
SAMAXKERNEL = ../arm/amax.c
2+
DAMAXKERNEL = ../arm/amax.c
3+
CAMAXKERNEL = ../arm/zamax.c
4+
ZAMAXKERNEL = ../arm/zamax.c
5+
6+
SAMINKERNEL = ../arm/amin.c
7+
DAMINKERNEL = ../arm/amin.c
8+
CAMINKERNEL = ../arm/zamin.c
9+
ZAMINKERNEL = ../arm/zamin.c
10+
11+
SMAXKERNEL = ../arm/max.c
12+
DMAXKERNEL = ../arm/max.c
13+
14+
SMINKERNEL = ../arm/min.c
15+
DMINKERNEL = ../arm/min.c
16+
17+
ISAMAXKERNEL = ../arm/iamax.c
18+
IDAMAXKERNEL = ../arm/iamax.c
19+
ICAMAXKERNEL = ../arm/izamax.c
20+
IZAMAXKERNEL = ../arm/izamax.c
21+
22+
ISAMINKERNEL = ../arm/iamin.c
23+
IDAMINKERNEL = ../arm/iamin.c
24+
ICAMINKERNEL = ../arm/izamin.c
25+
IZAMINKERNEL = ../arm/izamin.c
26+
27+
ISMAXKERNEL = ../arm/imax.c
28+
IDMAXKERNEL = ../arm/imax.c
29+
30+
ISMINKERNEL = ../arm/imin.c
31+
IDMINKERNEL = ../arm/imin.c
32+
33+
SASUMKERNEL = ../arm/asum.c
34+
DASUMKERNEL = ../arm/asum.c
35+
CASUMKERNEL = ../arm/zasum.c
36+
ZASUMKERNEL = ../arm/zasum.c
37+
38+
SAXPYKERNEL = ../arm/axpy.c
39+
DAXPYKERNEL = ../arm/axpy.c
40+
CAXPYKERNEL = ../arm/zaxpy.c
41+
ZAXPYKERNEL = ../arm/zaxpy.c
42+
43+
SCOPYKERNEL = ../arm/copy.c
44+
DCOPYKERNEL = ../arm/copy.c
45+
CCOPYKERNEL = ../arm/zcopy.c
46+
ZCOPYKERNEL = ../arm/zcopy.c
47+
48+
SDOTKERNEL = ../arm/dot.c
49+
DDOTKERNEL = ../arm/dot.c
50+
CDOTKERNEL = ../arm/zdot.c
51+
ZDOTKERNEL = ../arm/zdot.c
52+
53+
SNRM2KERNEL = ../arm/nrm2.c
54+
DNRM2KERNEL = ../arm/nrm2.c
55+
CNRM2KERNEL = ../arm/znrm2.c
56+
ZNRM2KERNEL = ../arm/znrm2.c
57+
58+
SROTKERNEL = ../arm/rot.c
59+
DROTKERNEL = ../arm/rot.c
60+
CROTKERNEL = ../arm/zrot.c
61+
ZROTKERNEL = ../arm/zrot.c
62+
63+
SSCALKERNEL = ../arm/scal.c
64+
DSCALKERNEL = ../arm/scal.c
65+
CSCALKERNEL = ../arm/zscal.c
66+
ZSCALKERNEL = ../arm/zscal.c
67+
68+
SSWAPKERNEL = ../arm/swap.c
69+
DSWAPKERNEL = ../arm/swap.c
70+
CSWAPKERNEL = ../arm/zswap.c
71+
ZSWAPKERNEL = ../arm/zswap.c
72+
73+
SGEMVNKERNEL = ../arm/gemv_n.c
74+
DGEMVNKERNEL = ../arm/gemv_n.c
75+
CGEMVNKERNEL = ../arm/zgemv_n.c
76+
ZGEMVNKERNEL = ../arm/zgemv_n.c
77+
78+
SGEMVTKERNEL = ../arm/gemv_t.c
79+
DGEMVTKERNEL = ../arm/gemv_t.c
80+
CGEMVTKERNEL = ../arm/zgemv_t.c
81+
ZGEMVTKERNEL = ../arm/zgemv_t.c
82+
83+
STRMMKERNEL = ../generic/trmmkernel_2x2.c
84+
DTRMMKERNEL = trmm8x4V.S
85+
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
86+
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
87+
88+
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
89+
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
90+
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
91+
SGEMMONCOPYOBJ = sgemm_oncopy.o
92+
SGEMMOTCOPYOBJ = sgemm_otcopy.o
93+
94+
95+
96+
DGEMMKERNEL = gemm8x4V.S
97+
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
98+
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
99+
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
100+
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
101+
DGEMMINCOPYOBJ = dgemm_incopy.o
102+
DGEMMITCOPYOBJ = dgemm_itcopy.o
103+
DGEMMONCOPYOBJ = dgemm_oncopy.o
104+
DGEMMOTCOPYOBJ = dgemm_otcopy.o
105+
106+
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
107+
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
108+
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
109+
CGEMMONCOPYOBJ = cgemm_oncopy.o
110+
CGEMMOTCOPYOBJ = cgemm_otcopy.o
111+
112+
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
113+
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
114+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
115+
ZGEMMONCOPYOBJ = zgemm_oncopy.o
116+
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
117+
118+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
119+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
120+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
121+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
122+
123+
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
124+
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
125+
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
126+
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
127+
128+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
129+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
130+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
131+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
132+
133+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
134+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
135+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
136+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
137+
138+
139+
140+
141+

kernel/zarch/KERNEL.ZARCH_GENERIC

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,3 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
131131

132132

133133

134-

0 commit comments

Comments
 (0)