Skip to content

Commit 9eb897c

Browse files
authored
Merge pull request #75 from xianyi/develop
rebase
2 parents 525db54 + 7cead56 commit 9eb897c

File tree

10 files changed

+1746
-66
lines changed

10 files changed

+1746
-66
lines changed

CMakeLists.txt

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 9.dev)
9+
set(OpenBLAS_PATCH_VERSION 10.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions
@@ -249,7 +249,7 @@ if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
249249
endif()
250250
endif()
251251

252-
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
252+
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
253253
if (NOT DEFINED ARCH)
254254
set(ARCH_IN "x86_64")
255255
else()
@@ -358,10 +358,21 @@ endif()
358358

359359
if(NOT NO_CBLAS)
360360
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
361-
362361
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
363362
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
364363
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
364+
if (NOT ${SYMBOLPREFIX} STREQUAL "")
365+
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
366+
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
367+
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
368+
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
369+
endif()
370+
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
371+
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
372+
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
373+
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
374+
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
375+
endif()
365376
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
366377
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
367378
endif()

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,12 @@ clean ::
365365
@$(MAKE) -C kernel clean
366366
#endif
367367
@$(MAKE) -C reference clean
368-
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
368+
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0
369369
ifeq ($(OSNAME), Darwin)
370370
@rm -rf getarch.dSYM getarch_2nd.dSYM
371371
endif
372372
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
373+
@rm -f cblas.tmp cblas.tmp2
373374
@touch $(NETLIB_LAPACK_DIR)/make.inc
374375
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
375376
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h

Makefile.install

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,22 @@ install : lib.grd
4545

4646
ifndef NO_CBLAS
4747
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
48-
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
48+
@cp cblas.h cblas.tmp
49+
ifdef SYMBOLPREFIX
50+
@sed 's/cblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2
51+
@sed 's/openblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
52+
#change back any openblas_complex_float and double that got hit
53+
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2
54+
@sed 's/goto[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
55+
endif
56+
ifdef SYMBOLSUFFIX
57+
@sed 's/cblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2
58+
@sed 's/openblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
59+
#change back any openblas_complex_float and double that got hit
60+
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2
61+
@sed 's/goto[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
62+
endif
63+
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
4964
endif
5065

5166
ifneq ($(OSNAME), AIX)
@@ -168,4 +183,3 @@ endif
168183
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
169184
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
170185
@echo Install OK!
171-

Makefile.power

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,54 +10,36 @@ USE_OPENMP = 1
1010
endif
1111

1212
ifeq ($(CORE), POWER10)
13-
ifeq ($(USE_OPENMP), 1)
14-
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
15-
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -DUSE_OPENMP -fno-fast-math -fopenmp
16-
else
1713
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
1814
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
1915
endif
20-
endif
2116

2217
ifeq ($(CORE), POWER9)
23-
ifeq ($(USE_OPENMP), 1)
2418
ifneq ($(C_COMPILER), PGI)
25-
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
26-
else
27-
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
28-
endif
29-
ifneq ($(F_COMPILER), PGI)
30-
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -DUSE_OPENMP -fno-fast-math -fopenmp
19+
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
20+
ifneq ($(GCCVERSIONGT4), 1)
21+
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
22+
CCOMMON_OPT += -mcpu=power8 -mtune=power8
3123
else
32-
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
24+
CCOMMON_OPT += -mcpu=power9 -mtune=power9
3325
endif
3426
else
35-
ifneq ($(C_COMPILER), PGI)
36-
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
37-
else
3827
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
3928
endif
4029
ifneq ($(F_COMPILER), PGI)
41-
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -fno-fast-math
30+
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
31+
ifneq ($(GCCVERSIONGT4), 1)
32+
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
33+
FCOMMON_OPT += -mcpu=power8 -mtune=power8
4234
else
43-
FCOMMON_OPT += -O2 -Mrecursive
35+
FCOMMON_OPT += -mcpu=power9 -mtune=power9
4436
endif
37+
else
38+
FCOMMON_OPT += -O2 -Mrecursive
4539
endif
4640
endif
4741

4842
ifeq ($(CORE), POWER8)
49-
ifeq ($(USE_OPENMP), 1)
50-
ifneq ($(C_COMPILER), PGI)
51-
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
52-
else
53-
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
54-
endif
55-
ifneq ($(F_COMPILER), PGI)
56-
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -DUSE_OPENMP -fno-fast-math -fopenmp
57-
else
58-
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
59-
endif
60-
else
6143
ifneq ($(C_COMPILER), PGI)
6244
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
6345
else
@@ -73,6 +55,18 @@ else
7355
FCOMMON_OPT += -O2 -Mrecursive
7456
endif
7557
endif
58+
59+
ifeq ($(USE_OPENMP), 1)
60+
ifneq ($(C_COMPILER), PGI)
61+
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
62+
else
63+
CCOMMON_OPT += -DUSE_OPENMP -mp
64+
endif
65+
ifneq ($(F_COMPILER), PGI)
66+
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
67+
else
68+
FCOMMON_OPT += -DUSE_OPENMP -mp
69+
endif
7670
endif
7771

7872
# workaround for C->FORTRAN ABI violation in LAPACKE

cpuid_power.c

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
#include <sys/utsname.h>
4040
#ifdef _AIX
41+
#include <sys/systemcfg.h>
4142
#include <sys/vminfo.h>
4243
#endif
4344
#ifdef __APPLE__
@@ -137,35 +138,19 @@ int detect(void){
137138
#endif
138139

139140
#ifdef _AIX
140-
FILE *infile;
141-
char buffer[512], *p;
142-
143-
p = (char *)NULL;
144-
infile = popen("prtconf|grep 'Processor Type'", "r");
145-
while (fgets(buffer, sizeof(buffer), infile)){
146-
if (!strncmp("Pro", buffer, 3)){
147-
p = strchr(buffer, ':') + 2;
148-
#if 0
149-
fprintf(stderr, "%s\n", p);
150-
#endif
151-
break;
152-
}
153-
}
154-
155-
pclose(infile);
141+
// Cast from int to unsigned to ensure comparisons work for all bits in
142+
// the bit mask, even the top bit
143+
unsigned implementation = (unsigned) _system_configuration.implementation;
156144

157-
if (strstr(p, "POWER3")) return CPUTYPE_POWER3;
158-
if (strstr(p, "POWER4")) return CPUTYPE_POWER4;
159-
if (strstr(p, "PPC970")) return CPUTYPE_PPC970;
160-
if (strstr(p, "POWER5")) return CPUTYPE_POWER5;
161-
if (strstr(p, "POWER6")) return CPUTYPE_POWER6;
162-
if (strstr(p, "POWER7")) return CPUTYPE_POWER6;
163-
if (strstr(p, "POWER8")) return CPUTYPE_POWER8;
164-
if (strstr(p, "POWER9")) return CPUTYPE_POWER9;
165-
if (strstr(p, "POWER10")) return CPUTYPE_POWER10;
166-
if (strstr(p, "Cell")) return CPUTYPE_CELL;
167-
if (strstr(p, "7447")) return CPUTYPE_PPCG4;
168-
return CPUTYPE_POWER5;
145+
if (implementation >= 0x40000u) return CPUTYPE_POWER10;
146+
else if (implementation & 0x20000) return CPUTYPE_POWER9;
147+
else if (implementation & 0x10000) return CPUTYPE_POWER8;
148+
else if (implementation & 0x08000) return CPUTYPE_POWER7; // POWER 7
149+
else if (implementation & 0x04000) return CPUTYPE_POWER6;
150+
else if (implementation & 0x02000) return CPUTYPE_POWER5;
151+
else if (implementation & 0x01000) return CPUTYPE_POWER4; // MPC7450
152+
else if (implementation & 0x00800) return CPUTYPE_POWER4;
153+
else return CPUTYPE_POWER3;
169154
#endif
170155

171156
#ifdef __APPLE__

driver/others/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,10 @@ endif
4747
endif
4848

4949
ifdef USE_CUDA
50+
ifeq ($(USE_CUDA), 1)
5051
COMMONOBJS += cuda_init.$(SUFFIX)
5152
endif
53+
endif
5254

5355
ifdef FUNCTION_PROFILE
5456
COMMONOBJS += profile.$(SUFFIX)

kernel/power/KERNEL.POWER10

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,12 @@ ZSWAPKERNEL = zswap.c
187187
#
188188

189189
SGEMVNKERNEL = sgemv_n.c
190-
DGEMVNKERNEL = dgemv_n.c
190+
DGEMVNKERNEL = dgemv_n_power10.c
191191
CGEMVNKERNEL = cgemv_n.c
192192
ZGEMVNKERNEL = zgemv_n_4.c
193193
#
194194
SGEMVTKERNEL = sgemv_t.c
195-
DGEMVTKERNEL = dgemv_t.c
195+
DGEMVTKERNEL = dgemv_t_power10.c
196196
CGEMVTKERNEL = cgemv_t.c
197197
ZGEMVTKERNEL = zgemv_t_4.c
198198

0 commit comments

Comments
 (0)