Skip to content

Commit 8d12027

Browse files
authored
Merge pull request #86 from xianyi/develop
rebase
2 parents eaf7f82 + b1e0bcc commit 8d12027

File tree

10 files changed

+636
-23
lines changed

10 files changed

+636
-23
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
2929
else()
3030
set(NO_AFFINITY 1)
3131
endif()
32+
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
33+
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
3234

3335
# Add a prefix or suffix to all exported symbol names in the shared library.
3436
# Avoids conflicts with other BLAS libraries, especially when using
@@ -234,6 +236,9 @@ if (NOT MSVC AND NOT NOFORTRAN)
234236
add_subdirectory(ctest)
235237
endif()
236238
add_subdirectory(lapack-netlib/TESTING)
239+
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
240+
add_subdirectory(cpp_thread_test)
241+
endif()
237242
endif()
238243

239244
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ ifneq ($(NO_CBLAS), 1)
146146
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
147147
$(MAKE) -C cpp_thread_test all
148148
endif
149+
ifeq ($(CPP_THREAD_SAFETY_GEMV), 1)
150+
$(MAKE) -C cpp_thread_test dgemv_tester
151+
endif
149152
endif
150153
endif
151154

Makefile.rule

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,9 @@ COMMON_PROF = -pg
272272
# work at all.
273273
#
274274
# CPP_THREAD_SAFETY_TEST = 1
275+
#
276+
# use this to run only the less memory-hungry GEMV test
277+
# CPP_THREAD_SAFETY_GEMV = 1
275278

276279

277280
# If you want to enable the experimental BFLOAT16 support

Makefile.system

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,6 @@ endif
295295
ifeq ($(C_COMPILER), GCC)
296296
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
297297
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
298-
GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5)
299298
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
300299
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
301300
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
@@ -594,34 +593,36 @@ endif
594593
ifeq ($(ARCH), zarch)
595594
DYNAMIC_CORE = ZARCH_GENERIC
596595

597-
# Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer
598-
ifeq ($(GCCVERSIONGT5), 1)
599-
ZARCH_SUPPORT_Z13 := 1
600-
else ifeq ($(GCCVERSIONEQ5), 1)
601-
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
602-
ZARCH_SUPPORT_Z13 := 1
603-
endif
604-
endif
605-
606-
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
607-
ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1)
608-
ZARCH_SUPPORT_Z13 := 1
609-
endif
610-
endif
611-
612-
ifeq ($(ZARCH_SUPPORT_Z13), 1)
596+
# if the compiler accepts -march=arch11 or -march=z13 and can compile a file
597+
# with z13-specific inline assembly, then we can include support for Z13.
598+
# note: -march=z13 is equivalent to -march=arch11 yet some compiler releases
599+
# only support one or the other.
600+
# note: LLVM version 6.x supported -march=z13 yet could not handle vector
601+
# registers in inline assembly, so the check for supporting the -march flag is
602+
# not enough.
603+
ZARCH_TEST_COMPILE=-c $(TOPDIR)/kernel/zarch/damin_z13.c -I$(TOPDIR) -o /dev/null > /dev/null 2> /dev/null
604+
ZARCH_CC_SUPPORTS_ARCH11=$(shell $(CC) -march=arch11 $(ZARCH_TEST_COMPILE) && echo 1)
605+
ZARCH_CC_SUPPORTS_Z13=$(shell $(CC) -march=z13 $(ZARCH_TEST_COMPILE) && echo 1)
606+
607+
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH11), $(ZARCH_CC_SUPPORTS_Z13)), 1)
613608
DYNAMIC_CORE += Z13
609+
CCOMMON_OPT += -DDYN_Z13
614610
else
615-
$(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x)
611+
$(info OpenBLAS: Not building Z13 kernels because the compiler $(CC) does not support it)
616612
endif
617613

618-
ifeq ($(GCCVERSIONGTEQ7), 1)
614+
# as above for z13, check for -march=arch12 and z14 support in the compiler.
615+
ZARCH_CC_SUPPORTS_ARCH12=$(shell $(CC) -march=arch12 $(ZARCH_TEST_COMPILE) && echo 1)
616+
ZARCH_CC_SUPPORTS_Z14=$(shell $(CC) -march=z14 $(ZARCH_TEST_COMPILE) && echo 1)
617+
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH12), $(ZARCH_CC_SUPPORTS_Z14)), 1)
619618
DYNAMIC_CORE += Z14
619+
CCOMMON_OPT += -DDYN_Z14
620620
else
621-
$(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x)
622-
endif
621+
$(info OpenBLAS: Not building Z14 kernels because the compiler $(CC) does not support it)
623622
endif
624623

624+
endif # ARCH zarch
625+
625626
ifeq ($(ARCH), power)
626627
DYNAMIC_CORE = POWER6
627628
DYNAMIC_CORE += POWER8

cpp_thread_test/CMakeLists.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
include_directories(${PROJECT_SOURCE_DIR})
2+
include_directories(${PROJECT_BINARY_DIR})
3+
4+
enable_language(CXX)
5+
6+
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS")
7+
8+
if (USE_OPENMP)
9+
if (CPP_THREAD_SAFETY_TEST)
10+
message(STATUS building thread safety test)
11+
add_executable(dgemm_thread_safety dgemm_thread_safety.cpp)
12+
target_link_libraries(dgemm_thread_safety ${OpenBLAS_LIBNAME})
13+
add_test( dgemm_thread_safety ${CMAKE_CURRENT_BINARY_DIR}/dgemm_thread_safety)
14+
endif()
15+
16+
17+
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
18+
add_executable(dgemv_thread_safety dgemv_thread_safety.cpp)
19+
target_link_libraries(dgemv_thread_safety ${OpenBLAS_LIBNAME})
20+
add_test(dgemv_thread_safety ${CMAKE_CURRENT_BINARY_DIR}/dgemv_thread_safety)
21+
endif()
22+
23+
endif()

kernel/power/KERNEL.POWER10

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,13 @@ CASUMKERNEL = casum.c
142142
ZASUMKERNEL = zasum.c
143143
#
144144
SAXPYKERNEL = saxpy.c
145-
DAXPYKERNEL = daxpy.c
145+
DAXPYKERNEL = daxpy_power10.c
146146
ifneq ($(GCCVERSIONGTEQ9),1)
147147
CAXPYKERNEL = caxpy_power9.S
148148
else
149149
CAXPYKERNEL = caxpy.c
150150
endif
151-
ZAXPYKERNEL = zaxpy.c
151+
ZAXPYKERNEL = zaxpy_power10.c
152152
#
153153
SCOPYKERNEL = scopy.c
154154
DCOPYKERNEL = dcopy.c

kernel/power/daxpy_microk_power10.c

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/***************************************************************************
2+
Copyright (c) 2020, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#define HAVE_KERNEL_8 1
29+
30+
static void daxpy_kernel_8 (long n, double *x, double *y, double alpha)
31+
{
32+
__vector double t0;
33+
34+
__asm__
35+
(
36+
XXSPLTD_S(%x4,%x6,0)
37+
38+
"dcbt 0, %2 \n\t"
39+
"dcbt 0, %3 \n\t"
40+
41+
"lxvp 32, 0(%2) \n\t"
42+
"lxvp 34, 32(%2) \n\t"
43+
"lxvp 40, 64(%2) \n\t"
44+
"lxvp 42, 96(%2) \n\t"
45+
46+
"lxvp 36, 0(%3) \n\t"
47+
"lxvp 38, 32(%3) \n\t"
48+
"lxvp 44, 64(%3) \n\t"
49+
"lxvp 46, 96(%3) \n\t"
50+
51+
"addi %2, %2, 128 \n\t"
52+
53+
"addic. %1, %1, -16 \n\t"
54+
"ble two%= \n\t"
55+
56+
".align 5 \n"
57+
"one%=: \n\t"
58+
59+
"xvmaddadp 36, 32, %x4 \n\t"
60+
"xvmaddadp 37, 33, %x4 \n\t"
61+
62+
"lxvp 32, 0(%2) \n\t"
63+
"stxvp 36, 0(%3) \n\t"
64+
65+
"xvmaddadp 38, 34, %x4 \n\t"
66+
"xvmaddadp 39, 35, %x4 \n\t"
67+
68+
"lxvp 34, 32(%2) \n\t"
69+
"stxvp 38, 32(%3) \n\t"
70+
71+
72+
"lxvp 36, 128(%3) \n\t"
73+
"lxvp 38, 160(%3) \n\t"
74+
75+
"xvmaddadp 44, 40, %x4 \n\t"
76+
"xvmaddadp 45, 41, %x4 \n\t"
77+
78+
"lxvp 40, 64(%2) \n\t"
79+
"stxvp 44, 64(%3) \n\t"
80+
81+
"xvmaddadp 46, 42, %x4 \n\t"
82+
"xvmaddadp 47, 43, %x4 \n\t"
83+
84+
"lxvp 42, 96(%2) \n\t"
85+
"stxvp 46, 96(%3) \n\t"
86+
87+
"addi %2, %2, 128 \n\t"
88+
"addi %3, %3, 128 \n\t"
89+
90+
"lxvp 44, 64(%3) \n\t"
91+
"lxvp 46, 96(%3) \n\t"
92+
93+
"addic. %1, %1, -16 \n\t"
94+
"bgt one%= \n"
95+
96+
"two%=: \n\t"
97+
98+
"xvmaddadp 36, 32, %x4 \n\t"
99+
"xvmaddadp 37, 33, %x4 \n\t"
100+
"xvmaddadp 38, 34, %x4 \n\t"
101+
"xvmaddadp 39, 35, %x4 \n\t"
102+
103+
"xvmaddadp 44, 40, %x4 \n\t"
104+
"xvmaddadp 45, 41, %x4 \n\t"
105+
"xvmaddadp 46, 42, %x4 \n\t"
106+
"xvmaddadp 47, 43, %x4 \n\t"
107+
108+
"stxvp 36, 0(%3) \n\t"
109+
"stxvp 38, 32(%3) \n\t"
110+
"stxvp 44, 64(%3) \n\t"
111+
"stxvp 46, 96(%3) \n\t"
112+
113+
"#n=%1 x=%5=%2 y=%0=%3 alpha=%6 t0=%x4\n"
114+
:
115+
"+m" (*y),
116+
"+r" (n), // 1
117+
"+b" (x), // 2
118+
"+b" (y), // 3
119+
"=wa" (t0) // 4
120+
:
121+
"m" (*x),
122+
"d" (alpha) // 6
123+
:
124+
"cr0",
125+
"vs32","vs33","vs34","vs35","vs36","vs37", "vs38", "vs39",
126+
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47"
127+
);
128+
129+
}
130+
131+

kernel/power/daxpy_power10.c

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/***************************************************************************
2+
Copyright (c) 2020, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#if defined(__VEC__) || defined(__ALTIVEC__)
31+
#include "daxpy_microk_power10.c"
32+
#endif
33+
34+
35+
#ifndef HAVE_KERNEL_8
36+
37+
static void daxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha)
38+
{
39+
BLASLONG register i = 0;
40+
41+
while(i < n)
42+
{
43+
y[i] += alpha * x[i];
44+
y[i+1] += alpha * x[i+1];
45+
y[i+2] += alpha * x[i+2];
46+
y[i+3] += alpha * x[i+3];
47+
y[i+4] += alpha * x[i+4];
48+
y[i+5] += alpha * x[i+5];
49+
y[i+6] += alpha * x[i+6];
50+
y[i+7] += alpha * x[i+7];
51+
i+=8 ;
52+
53+
}
54+
55+
}
56+
57+
#endif
58+
59+
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
60+
{
61+
BLASLONG i=0;
62+
BLASLONG ix=0,iy=0;
63+
64+
if ( n <= 0 ) return(0);
65+
66+
if ( (inc_x == 1) && (inc_y == 1) )
67+
{
68+
69+
BLASLONG n1 = n & -16;
70+
71+
if ( n1 )
72+
daxpy_kernel_8(n1, x, y, da);
73+
74+
i = n1;
75+
while(i < n)
76+
{
77+
78+
y[i] += da * x[i] ;
79+
i++ ;
80+
81+
}
82+
return(0);
83+
84+
85+
}
86+
87+
BLASLONG n1 = n & -4;
88+
89+
while(i < n1)
90+
{
91+
92+
FLOAT m1 = da * x[ix] ;
93+
FLOAT m2 = da * x[ix+inc_x] ;
94+
FLOAT m3 = da * x[ix+2*inc_x] ;
95+
FLOAT m4 = da * x[ix+3*inc_x] ;
96+
97+
y[iy] += m1 ;
98+
y[iy+inc_y] += m2 ;
99+
y[iy+2*inc_y] += m3 ;
100+
y[iy+3*inc_y] += m4 ;
101+
102+
ix += inc_x*4 ;
103+
iy += inc_y*4 ;
104+
i+=4 ;
105+
106+
}
107+
108+
while(i < n)
109+
{
110+
111+
y[iy] += da * x[ix] ;
112+
ix += inc_x ;
113+
iy += inc_y ;
114+
i++ ;
115+
116+
}
117+
return(0);
118+
119+
}
120+
121+

0 commit comments

Comments
 (0)