Skip to content

Commit d8e2edf

Browse files
authored
Merge pull request #71 from xianyi/develop
rebase
2 parents a83680b + 419b868 commit d8e2edf

File tree

15 files changed

+513
-46
lines changed

15 files changed

+513
-46
lines changed

.github/workflows/dynamic_arch.yml

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ jobs:
99
fail-fast: false
1010
matrix:
1111
os: [ubuntu-latest, macos-latest]
12+
fortran: [gfortran, flang]
1213
build: [cmake, make]
1314
steps:
1415
- name: Checkout repository
@@ -24,7 +25,7 @@ jobs:
2425
# Restore any ccache cache entry, if none for
2526
# ${{ runner.os }}-ccache-${{ github.sha }} exists
2627
restore-keys: |
27-
${{ runner.os }}-ccache
28+
${{ runner.os }}-ccache-
2829
2930
- name: Print system information
3031
run: |
@@ -49,8 +50,8 @@ jobs:
4950
fi
5051
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB
5152
52-
- name: Build
53-
if: matrix.build == 'make'
53+
- name: gfortran build
54+
if: matrix.build == 'make' && matrix.fortran == 'gfortran'
5455
run: |
5556
if [ "$RUNNER_OS" == "Linux" ]; then
5657
export PATH="/usr/lib/ccache:${PATH}"
@@ -63,8 +64,29 @@ jobs:
6364
6465
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0
6566
66-
- name: CMake build
67-
if: matrix.build == 'cmake'
67+
- name: flang build
68+
if: matrix.build == 'make' && matrix.fortran == 'flang'
69+
run: |
70+
if [ "$RUNNER_OS" == "Linux" ]; then
71+
export PATH="/usr/lib/ccache:${PATH}"
72+
elif [ "$RUNNER_OS" == "macOS" ]; then
73+
exit 0
74+
else
75+
echo "$RUNNER_OS not supported"
76+
exit 1
77+
fi
78+
79+
cd /usr/
80+
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
81+
sudo tar xf flang-20190329-x86-70.tgz
82+
sudo rm flang-20190329-x86-70.tgz
83+
cd -
84+
85+
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC=flang
86+
87+
88+
- name: CMake gfortran build
89+
if: matrix.build == 'cmake' && matrix.fortran == 'gfortran'
6890
run: |
6991
if [ "$RUNNER_OS" == "Linux" ]; then
7092
export PATH="/usr/lib/ccache:${PATH}"

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 10.dev)
10-
9+
set(OpenBLAS_PATCH_VERSION 9.dev)
1110
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1211

1312
# Adhere to GNU filesystem layout conventions
@@ -234,6 +233,7 @@ if (NOT MSVC AND NOT NOFORTRAN)
234233
if(NOT NO_CBLAS)
235234
add_subdirectory(ctest)
236235
endif()
236+
add_subdirectory(lapack-netlib/TESTING)
237237
endif()
238238

239239
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES

Makefile.power

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ endif
1111

1212
ifeq ($(CORE), POWER10)
1313
ifeq ($(USE_OPENMP), 1)
14-
COMMON_OPT += -Ofast -mcpu=future -mtune=future -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
15-
FCOMMON_OPT += -O2 -frecursive -mcpu=future -mtune=future -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
14+
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
15+
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
1616
else
17-
COMMON_OPT += -Ofast -mcpu=future -mtune=future -mvsx -malign-power -fno-fast-math
18-
FCOMMON_OPT += -O2 -frecursive -mcpu=future -mtune=future -malign-power -fno-fast-math
17+
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -fno-fast-math
18+
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -fno-fast-math
1919
endif
2020
endif
2121

@@ -34,8 +34,11 @@ ifeq ($(USE_OPENMP), 1)
3434
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
3535
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
3636
else
37-
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
38-
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
37+
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
38+
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
39+
ifeq ($(OSNAME), AIX)
40+
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
41+
endif
3942
endif
4043
endif
4144

@@ -78,6 +81,9 @@ CCOMMON_OPT += -mpowerpc64 -maix64
7881
ifeq ($(COMPILER_F77), g77)
7982
FCOMMON_OPT += -mpowerpc64 -maix64
8083
endif
84+
ifeq ($(F_COMPILER), GFORTRAN)
85+
FCOMMON_OPT += -mpowerpc64 -maix64
86+
endif
8187
ifeq ($(COMPILER_F77), xlf)
8288
FCOMMON_OPT += -q64
8389
endif

Makefile.system

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ endif
109109
ifeq ($(TARGET), ARMV8)
110110
GETARCH_FLAGS := -DFORCE_ARMV7
111111
endif
112+
ifeq ($(TARGET), POWER8)
113+
GETARCH_FLAGS := -DFORCE_POWER6
114+
endif
112115
endif
113116

114117

@@ -286,8 +289,17 @@ GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5)
286289
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
287290
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
288291
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
289-
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2)
290-
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
292+
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
293+
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
294+
# Note that the behavior of -dumpversion is compile-time-configurable for
295+
# gcc-7.x and newer. Use -dumpfullversion there
296+
ifeq ($(GCCVERSIONGTEQ7),1)
297+
GCCDUMPVERSION_PARAM := -dumpfullversion
298+
else
299+
GCCDUMPVERSION_PARAM := -dumpversion
300+
endif
301+
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
302+
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
291303
endif
292304

293305
#
@@ -612,9 +624,12 @@ DYNAMIC_CORE += POWER9
612624
else
613625
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
614626
endif
615-
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
616627
ifeq ($(GCCVERSIONGTEQ11), 1)
617628
DYNAMIC_CORE += POWER10
629+
else ifeq ($(GCCVERSIONEQ10), 1)
630+
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
631+
DYNAMIC_CORE += POWER10
632+
endif
618633
else
619634
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
620635
endif

Makefile.x86_64

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,24 @@ ifeq ($(CORE), HASWELL)
3131
ifndef DYNAMIC_ARCH
3232
ifndef NO_AVX2
3333
ifeq ($(C_COMPILER), GCC)
34+
# AVX2 support was added in 4.7.0
35+
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
36+
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
37+
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
3438
CCOMMON_OPT += -mavx2
3539
endif
40+
endif
3641
ifeq ($(F_COMPILER), GFORTRAN)
42+
# AVX2 support was added in 4.7.0
43+
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
44+
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
45+
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
3746
FCOMMON_OPT += -mavx2
3847
endif
3948
endif
4049
endif
4150
endif
51+
endif
4252

4353

4454

c_check

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# Checking cross compile
77
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
88
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
9+
$hostarch = `uname -p` if ($hostos eq "AIX");
910
$hostarch = "x86_64" if ($hostarch eq "amd64");
1011
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
1112
$hostarch = "arm64" if ($hostarch eq "aarch64");

common_thread.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,18 +132,18 @@ extern int blas_server_avail;
132132
static __inline int num_cpu_avail(int level) {
133133

134134
#ifdef USE_OPENMP
135-
int openmp_nthreads=0;
135+
int openmp_nthreads=omp_get_max_threads();
136136
#endif
137137

138+
#ifndef USE_OPENMP
138139
if (blas_cpu_number == 1
139-
140+
#endif
140141
#ifdef USE_OPENMP
141-
|| omp_in_parallel()
142+
if (openmp_nthreads == 1 || omp_in_parallel()
142143
#endif
143-
) return 1;
144+
) return 1;
144145

145146
#ifdef USE_OPENMP
146-
openmp_nthreads=omp_get_max_threads();
147147
if (blas_cpu_number != openmp_nthreads) {
148148
goto_set_num_threads(openmp_nthreads);
149149
}

cpuid_power.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
#define CPUTYPE_PPCG4 7
5858
#define CPUTYPE_POWER8 8
5959
#define CPUTYPE_POWER9 9
60-
#define CPUTYPE_POWER10 10
60+
#define CPUTYPE_POWER10 10
6161

6262
char *cpuname[] = {
6363
"UNKNOWN",
@@ -83,8 +83,8 @@ char *lowercpuname[] = {
8383
"cell",
8484
"ppcg4",
8585
"power8",
86-
"power9",
87-
"power10"
86+
"power9",
87+
"power10"
8888
};
8989

9090
char *corename[] = {
@@ -97,8 +97,8 @@ char *corename[] = {
9797
"CELL",
9898
"PPCG4",
9999
"POWER8",
100-
"POWER9",
101-
"POWER10"
100+
"POWER9",
101+
"POWER10"
102102
};
103103

104104
int detect(void){
@@ -154,17 +154,17 @@ int detect(void){
154154

155155
pclose(infile);
156156

157-
if (!strncasecmp(p, "POWER3", 6)) return CPUTYPE_POWER3;
158-
if (!strncasecmp(p, "POWER4", 6)) return CPUTYPE_POWER4;
159-
if (!strncasecmp(p, "PPC970", 6)) return CPUTYPE_PPC970;
160-
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
161-
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
162-
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
163-
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
164-
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
165-
if (!strncasecmp(p, "POWER10", 7)) return CPUTYPE_POWER10;
166-
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
167-
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
157+
if (strstr(p, "POWER3")) return CPUTYPE_POWER3;
158+
if (strstr(p, "POWER4")) return CPUTYPE_POWER4;
159+
if (strstr(p, "PPC970")) return CPUTYPE_PPC970;
160+
if (strstr(p, "POWER5")) return CPUTYPE_POWER5;
161+
if (strstr(p, "POWER6")) return CPUTYPE_POWER6;
162+
if (strstr(p, "POWER7")) return CPUTYPE_POWER6;
163+
if (strstr(p, "POWER8")) return CPUTYPE_POWER8;
164+
if (strstr(p, "POWER9")) return CPUTYPE_POWER9;
165+
if (strstr(p, "POWER10")) return CPUTYPE_POWER10;
166+
if (strstr(p, "Cell")) return CPUTYPE_CELL;
167+
if (strstr(p, "7447")) return CPUTYPE_PPCG4;
168168
return CPUTYPE_POWER5;
169169
#endif
170170

driver/others/dynamic_power.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@ extern gotoblas_t gotoblas_POWER8;
66
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
77
extern gotoblas_t gotoblas_POWER9;
88
#endif
9-
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
9+
#if (!defined __GNUC__) || ( __GNUC__ >= 11) \
10+
|| (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
11+
#define HAVE_P10_SUPPORT 1
12+
#endif
13+
#ifdef HAVE_P10_SUPPORT
1014
extern gotoblas_t gotoblas_POWER10;
1115
#endif
1216

@@ -28,7 +32,7 @@ char *gotoblas_corename(void) {
2832
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
2933
if (gotoblas == &gotoblas_POWER9) return corename[3];
3034
#endif
31-
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
35+
#ifdef HAVE_P10_SUPPORT
3236
if (gotoblas == &gotoblas_POWER10) return corename[4];
3337
#endif
3438
return corename[0];
@@ -44,8 +48,8 @@ static gotoblas_t *get_coretype(void) {
4448
if (__builtin_cpu_is("power9"))
4549
return &gotoblas_POWER9;
4650
#endif
47-
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
48-
if (__builtin_cpu_is("isa_3_1") && __builtin_cpu_supports ("mma"))
51+
#ifdef HAVE_P10_SUPPORT
52+
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
4953
return &gotoblas_POWER10;
5054
#endif
5155
return NULL;
@@ -73,7 +77,7 @@ static gotoblas_t *force_coretype(char * coretype) {
7377
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
7478
case 3: return (&gotoblas_POWER9);
7579
#endif
76-
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
80+
#ifdef HAVE_P10_SUPPORT
7781
case 4: return (&gotoblas_POWER10);
7882
#endif
7983
default: return NULL;

getarch.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9090
#include <sys/sysinfo.h>
9191
#include <unistd.h>
9292
#endif
93+
#if defined(AIX)
94+
#include <sys/sysinfo.h>
95+
#endif
9396

97+
#if defined(__x86_64__) || defined(_M_X64)
9498
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6))
9599
#else
96100
#define NO_AVX512
97101
#endif
102+
#endif
98103
/* #define FORCE_P2 */
99104
/* #define FORCE_KATMAI */
100105
/* #define FORCE_COPPERMINE */
@@ -1297,6 +1302,11 @@ static int get_num_cores(void) {
12971302
sysctl(m, 2, &count, &len, NULL, 0);
12981303

12991304
return count;
1305+
1306+
#elif defined(AIX)
1307+
//returns the number of processors which are currently online
1308+
return sysconf(_SC_NPROCESSORS_ONLN);
1309+
13001310
#else
13011311
return 2;
13021312
#endif

0 commit comments

Comments
 (0)