Skip to content

Commit 2931feb

Browse files
authored
Merge pull request #58 from xianyi/develop
rebase
2 parents 9472dd9 + 20245de commit 2931feb

File tree

12 files changed

+675
-78
lines changed

12 files changed

+675
-78
lines changed

CONTRIBUTORS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,4 +183,7 @@ In chronological order:
183183

184184
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
185185
* [2020-04-15] Half-precision GEMM for bfloat16
186-
186+
187+
* Marius Hillenbrand <https://github.com/mhillenibm>
188+
* [2020-05-12] Revise dynamic architecture detection for IBM z
189+
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14

Makefile.system

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,27 @@ DYNAMIC_CORE += EMAG8180
563563
endif
564564

565565
ifeq ($(ARCH), zarch)
566-
DYNAMIC_CORE = Z13
566+
DYNAMIC_CORE = ZARCH_GENERIC
567+
568+
# Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer
569+
GCC_GE_52 := $(subst 0,,$(shell expr `$(CC) -dumpversion` \>= "5.2"))
570+
571+
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
572+
RHEL_WITH_Z13 := $(subst 0,,$(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"))
573+
endif
574+
575+
ifeq ($(or $(GCC_GE_52),$(RHEL_WITH_Z13)), 1)
576+
DYNAMIC_CORE += Z13
577+
else
578+
$(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x)
579+
endif
580+
581+
GCC_MAJOR_GE_7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
582+
ifeq ($(GCC_MAJOR_GE_7), 1)
567583
DYNAMIC_CORE += Z14
584+
else
585+
$(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x)
586+
endif
568587
endif
569588

570589
ifeq ($(ARCH), power)
@@ -855,7 +874,7 @@ ifneq ($(INTERFACE64), 0)
855874
FCOMMON_OPT += -i8
856875
endif
857876
endif
858-
FCOMMON_OPT += -recursive
877+
FCOMMON_OPT += -recursive -fp-model strict -assume protect-parens
859878
ifeq ($(USE_OPENMP), 1)
860879
FCOMMON_OPT += -fopenmp
861880
endif

Makefile.zarch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@ FCOMMON_OPT += -march=z13 -mzvector
55
endif
66

77
ifeq ($(CORE), Z14)
8-
CCOMMON_OPT += -march=z14 -mzvector
8+
CCOMMON_OPT += -march=z14 -mzvector -O3
99
FCOMMON_OPT += -march=z14 -mzvector
1010
endif

benchmark/Makefile

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ else
4949
GOTO_LAPACK_TARGETS=
5050
endif
5151

52+
ifeq ($(BUILD_HALF),1)
53+
GOTO_HALF_TARGETS=shgemm.goto
54+
else
55+
GOTO_HALF_TARGETS=
56+
endif
57+
5258
ifeq ($(OSNAME), WINNT)
5359

5460
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
@@ -91,7 +97,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
9197
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
9298
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
9399
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
94-
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto
100+
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto $(GOTO_HALF_TARGETS)
95101

96102
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
97103
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
@@ -264,7 +270,7 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
264270
samin.goto damin.goto camin.goto zamin.goto \
265271
smin.goto dmin.goto \
266272
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto \
267-
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS)
273+
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_HALF_TARGETS)
268274

269275
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
270276
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
@@ -614,6 +620,11 @@ zcholesky.essl : zcholesky.$(SUFFIX)
614620
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
615621

616622
##################################### Sgemm ####################################################
623+
ifeq ($(BUILD_HALF),1)
624+
shgemm.goto : shgemm.$(SUFFIX) ../$(LIBNAME)
625+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
626+
endif
627+
617628
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
618629
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
619630

@@ -2916,6 +2927,11 @@ ccholesky.$(SUFFIX) : cholesky.c
29162927
zcholesky.$(SUFFIX) : cholesky.c
29172928
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
29182929

2930+
ifeq ($(BUILD_HALF),1)
2931+
shgemm.$(SUFFIX) : gemm.c
2932+
$(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -o $(@F) $^
2933+
endif
2934+
29192935
sgemm.$(SUFFIX) : gemm.c
29202936
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
29212937

benchmark/gemm.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3939

4040
#ifdef DOUBLE
4141
#define GEMM BLASFUNC(dgemm)
42+
#elif defined(HALF)
43+
#define GEMM BLASFUNC(shgemm)
4244
#else
4345
#define GEMM BLASFUNC(sgemm)
4446
#endif
@@ -120,7 +122,8 @@ static void *huge_malloc(BLASLONG size){
120122

121123
int main(int argc, char *argv[]){
122124

123-
FLOAT *a, *b, *c;
125+
IFLOAT *a, *b;
126+
FLOAT *c;
124127
FLOAT alpha[] = {1.0, 0.0};
125128
FLOAT beta [] = {0.0, 0.0};
126129
char transa = 'N';
@@ -184,10 +187,10 @@ int main(int argc, char *argv[]){
184187
k = to;
185188
}
186189

187-
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
190+
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
188191
fprintf(stderr,"Out of Memory!!\n");exit(1);
189192
}
190-
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
193+
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
191194
fprintf(stderr,"Out of Memory!!\n");exit(1);
192195
}
193196
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
@@ -199,10 +202,10 @@ int main(int argc, char *argv[]){
199202
#endif
200203

201204
for (i = 0; i < m * k * COMPSIZE; i++) {
202-
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
205+
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
203206
}
204207
for (i = 0; i < k * n * COMPSIZE; i++) {
205-
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
208+
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
206209
}
207210
for (i = 0; i < m * n * COMPSIZE; i++) {
208211
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;

cmake/utils.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ macro(ParseMakefileVars MAKEFILE_IN)
4343
if (NOT "${line_match}" STREQUAL "")
4444
#message(STATUS "match on ${line_match}")
4545
set(var_name ${CMAKE_MATCH_1})
46-
set(var_value ${CMAKE_MATCH_2})
46+
# set(var_value ${CMAKE_MATCH_2})
47+
string(STRIP ${CMAKE_MATCH_2} var_value)
4748
# check for Makefile variables in the string, e.g. $(TSUFFIX)
4849
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
4950
foreach (make_var ${make_var_matches})
@@ -63,7 +64,7 @@ macro(ParseMakefileVars MAKEFILE_IN)
6364
string(REGEX MATCH "ifeq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
6465
if (NOT "${line_match}" STREQUAL "")
6566
# message(STATUS "IFEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
66-
if (${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2})
67+
if (DEFINED ${${CMAKE_MATCH_1}} AND ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2})
6768
# message (STATUS "condition is true")
6869
set (IfElse 1)
6970
else ()

driver/others/dynamic_zarch.c

Lines changed: 90 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,58 @@
1-
21
#include "common.h"
2+
#include <stdbool.h>
3+
4+
// Gate kernels for z13 and z14 on gcc version
5+
#if (__GNUC__ == 5 && __GNUC_MINOR__ >= 2) || __GNUC__ >= 6 || \
6+
/* RHEL 7 since 7.3: */ \
7+
(__GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 5 && \
8+
__GNUC_RH_RELEASE__ >= 11)
9+
#define HAVE_Z13_SUPPORT
10+
#endif
11+
12+
#if __GNUC__ >= 7
13+
#define HAVE_Z14_SUPPORT
14+
#endif
15+
16+
// Guard the use of getauxval() on glibc version >= 2.16
17+
#ifdef __GLIBC__
18+
#include <features.h>
19+
#if __GLIBC_PREREQ(2, 16)
20+
#include <sys/auxv.h>
21+
#define HAVE_GETAUXVAL 1
22+
23+
static unsigned long get_hwcap(void)
24+
{
25+
unsigned long hwcap = getauxval(AT_HWCAP);
26+
char *maskenv;
27+
28+
// honor requests for not using specific CPU features in LD_HWCAP_MASK
29+
maskenv = getenv("LD_HWCAP_MASK");
30+
if (maskenv)
31+
hwcap &= strtoul(maskenv, NULL, 0);
32+
33+
return hwcap;
34+
// note that a missing auxval is interpreted as no capabilities
35+
// available, which is safe.
36+
}
337

38+
#else // __GLIBC_PREREQ(2, 16)
39+
#warn "Cannot detect SIMD support in Z13 or newer architectures since glibc is older than 2.16"
40+
41+
static unsigned long get_hwcap(void) {
42+
// treat missing support for getauxval() as no capabilities available,
43+
// which is safe.
44+
return 0;
45+
}
46+
#endif // __GLIBC_PREREQ(2, 16)
47+
#endif // __GLIBC
48+
49+
extern gotoblas_t gotoblas_ZARCH_GENERIC;
50+
#ifdef HAVE_Z13_SUPPORT
451
extern gotoblas_t gotoblas_Z13;
52+
#endif
53+
#ifdef HAVE_Z14_SUPPORT
554
extern gotoblas_t gotoblas_Z14;
6-
//extern gotoblas_t gotoblas_Z15;
7-
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
8-
//extern gotoblas_t gotoblas_Z14;
9-
//#endif
55+
#endif
1056

1157
#define NUM_CORETYPES 4
1258

@@ -16,47 +62,50 @@ static char* corename[] = {
1662
"unknown",
1763
"Z13",
1864
"Z14",
19-
// "Z15",
2065
"ZARCH_GENERIC",
2166
};
2267

2368
char* gotoblas_corename(void) {
69+
#ifdef HAVE_Z13_SUPPORT
2470
if (gotoblas == &gotoblas_Z13) return corename[1];
71+
#endif
72+
#ifdef HAVE_Z14_SUPPORT
2573
if (gotoblas == &gotoblas_Z14) return corename[2];
26-
// if (gotoblas == &gotoblas_Z15) return corename[3];
27-
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
28-
// if (gotoblas == &gotoblas_POWER9) return corename[3];
29-
//#endif
30-
return corename[0]; // try generic?
74+
#endif
75+
if (gotoblas == &gotoblas_ZARCH_GENERIC) return corename[3];
76+
77+
return corename[0];
3178
}
3279

33-
// __builtin_cpu_is is not supported by zarch
80+
/**
81+
* Detect the fitting set of kernels by retrieving the CPU features supported by
82+
* OS from the auxiliary value AT_HWCAP and choosing the set of kernels
83+
* ("coretype") that exploits most of the features and can be compiled with the
84+
* available gcc version.
85+
* Note that we cannot use vector registers on a z13 or newer unless supported
86+
* by the OS kernel (which needs to handle them properly during context switch).
87+
*/
3488
static gotoblas_t* get_coretype(void) {
35-
FILE* infile;
36-
char buffer[512], * p;
37-
38-
p = (char*)NULL;
39-
infile = fopen("/proc/sysinfo", "r");
40-
while (fgets(buffer, sizeof(buffer), infile)) {
41-
if (!strncmp("Type", buffer, 4)) {
42-
p = strchr(buffer, ':') + 2;
43-
#if 0
44-
fprintf(stderr, "%s\n", p);
45-
#endif
46-
break;
47-
}
48-
}
4989

50-
fclose(infile);
90+
unsigned long hwcap __attribute__((unused)) = get_hwcap();
5191

52-
if (strstr(p, "2964")) return &gotoblas_Z13;
53-
if (strstr(p, "2965")) return &gotoblas_Z13;
54-
if (strstr(p, "3906")) return &gotoblas_Z14;
55-
if (strstr(p, "3907")) return &gotoblas_Z14;
56-
if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14
57-
if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14
92+
// z14 and z15 systems: exploit Vector Facility (SIMD) and
93+
// Vector-Enhancements Facility 1 (float SIMD instructions), if present.
94+
#ifdef HAVE_Z14_SUPPORT
95+
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
96+
return &gotoblas_Z14;
97+
#endif
98+
99+
// z13: Vector Facility (SIMD for double)
100+
#ifdef HAVE_Z13_SUPPORT
101+
if (hwcap & HWCAP_S390_VX)
102+
return &gotoblas_Z13;
103+
#endif
58104

59-
return NULL; // should be ZARCH_GENERIC
105+
// fallback in case of missing compiler support, systems before z13, or
106+
// when the OS does not advertise support for the Vector Facility (e.g.,
107+
// missing support in the OS kernel)
108+
return &gotoblas_ZARCH_GENERIC;
60109
}
61110

62111
static gotoblas_t* force_coretype(char* coretype) {
@@ -76,12 +125,13 @@ static gotoblas_t* force_coretype(char* coretype) {
76125

77126
switch (found)
78127
{
128+
#ifdef HAVE_Z13_SUPPORT
79129
case 1: return (&gotoblas_Z13);
130+
#endif
131+
#ifdef HAVE_Z14_SUPPORT
80132
case 2: return (&gotoblas_Z14);
81-
// case 3: return (&gotoblas_Z15);
82-
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
83-
// case 3: return (&gotoblas_POWER9);
84-
//#endif
133+
#endif
134+
case 3: return (&gotoblas_ZARCH_GENERIC);
85135
default: return NULL;
86136
}
87137
snprintf(message, 128, "Core not found: %s\n", coretype);
@@ -109,9 +159,9 @@ void gotoblas_dynamic_init(void) {
109159

110160
if (gotoblas == NULL)
111161
{
112-
snprintf(coremsg, 128, "Falling back to Z14 core\n");
162+
snprintf(coremsg, 128, "Failed to detect system, falling back to generic z support.\n");
113163
openblas_warning(1, coremsg);
114-
gotoblas = &gotoblas_Z14;
164+
gotoblas = &gotoblas_ZARCH_GENERIC;
115165
}
116166

117167
if (gotoblas && gotoblas->init) {

kernel/x86_64/KERNEL.SKYLAKEX

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ DGEMM_BETA = dgemm_beta_skylakex.c
2424

2525
CGEMMKERNEL = cgemm_kernel_8x2_skylakex.c
2626
ZGEMMKERNEL = zgemm_kernel_4x2_skylakex.c
27+
28+
CSCALKERNEL = ../arm/zscal.c
29+
ZSCALKERNEL = ../arm/zscal.c

kernel/zarch/KERNEL.Z14

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,23 +86,23 @@ DGEMVTKERNEL = dgemv_t_4.c
8686
CGEMVTKERNEL = cgemv_t_4.c
8787
ZGEMVTKERNEL = zgemv_t_4.c
8888

89-
STRMMKERNEL = strmm8x4V.S
89+
STRMMKERNEL = gemm_vec.c
9090
DTRMMKERNEL = trmm8x4V.S
9191
CTRMMKERNEL = ctrmm4x4V.S
9292
ZTRMMKERNEL = ztrmm4x4V.S
9393

94-
SGEMMKERNEL = strmm8x4V.S
95-
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
96-
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
97-
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
98-
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
94+
SGEMMKERNEL = gemm_vec.c
95+
ifneq ($(SGEMM_UNROLL_M),$(SGEMM_UNROLL_N))
96+
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
97+
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
9998
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
10099
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
100+
endif
101+
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
102+
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
101103
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
102104
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
103105

104-
105-
106106
DGEMMKERNEL = gemm8x4V.S
107107
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
108108
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
@@ -145,7 +145,3 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
145145
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
146146
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
147147

148-
149-
150-
151-

0 commit comments

Comments
 (0)