Skip to content

Commit 3a8f0a6

Browse files
authored
Merge pull request #1656 from xianyi/develop
Update the 0.3 branch from develop
2 parents 939452e + 3d3c197 commit 3a8f0a6

File tree

162 files changed

+13351
-704
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

162 files changed

+13351
-704
lines changed

CMakeLists.txt

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 0.dev)
9+
set(OpenBLAS_PATCH_VERSION 1.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions
1313
include(GNUInstallDirs)
1414

15+
include(CMakePackageConfigHelpers)
16+
17+
1518
set(OpenBLAS_LIBNAME openblas)
1619

1720
#######
@@ -20,6 +23,7 @@ option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON
2023
endif()
2124
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
2225
option(DYNAMIC_ARCH "Build with DYNAMIC_ARCH" OFF)
26+
option(DYNAMIC_OLDER "Support older cpus with DYNAMIC_ARCH" OFF)
2327
option(BUILD_RELAPACK "Build with ReLAPACK (recursive LAPACK" OFF)
2428
#######
2529
if(BUILD_WITHOUT_LAPACK)
@@ -208,6 +212,7 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
208212

209213
# Install libraries
210214
install(TARGETS ${OpenBLAS_LIBNAME}
215+
EXPORT "OpenBLASTargets"
211216
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
212217
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
213218
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
@@ -267,3 +272,21 @@ if(PKG_CONFIG_FOUND)
267272
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
268273
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
269274
endif()
275+
276+
277+
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
278+
set(PN OpenBLAS)
279+
set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}")
280+
configure_package_config_file(cmake/${PN}Config.cmake.in
281+
"${CMAKE_CURRENT_BINARY_DIR}/${PN}Config.cmake"
282+
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
283+
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
284+
VERSION ${${PN}_VERSION}
285+
COMPATIBILITY AnyNewerVersion)
286+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}Config.cmake
287+
${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
288+
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
289+
install(EXPORT "${PN}Targets"
290+
NAMESPACE "${PN}::"
291+
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
292+

Makefile

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ ifeq ($(DYNAMIC_ARCH), 1)
153153
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
154154
done
155155
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
156+
ifeq ($(DYNAMIC_OLDER), 1)
157+
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
158+
endif
156159
endif
157160
ifdef USE_THREAD
158161
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
@@ -294,9 +297,10 @@ endif
294297

295298
lapack-test :
296299
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
297-
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
300+
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
301+
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
298302
ifneq ($(CROSS), 1)
299-
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
303+
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
300304
./testsecond; ./testdsecnd; ./testieee; ./testversion )
301305
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
302306
endif
@@ -308,9 +312,9 @@ lapack-runtest:
308312

309313

310314
blas-test:
311-
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
315+
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
312316
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
313-
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
317+
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)
314318

315319

316320
dummy :

Makefile.install

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ endif
9898
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
9999
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
100100
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
101-
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
101+
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
102102
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
103103
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
104104
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"

Makefile.rule

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.0.dev
6+
VERSION = 0.3.1.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -17,6 +17,11 @@ VERSION = 0.3.0.dev
1717
# If you want to support multiple architecture in one binary
1818
# DYNAMIC_ARCH = 1
1919

20+
# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH
21+
# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON,
22+
# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures)
23+
# DYNAMIC_OLDER = 1
24+
2025
# C compiler including binary type(32bit / 64bit). Default is gcc.
2126
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
2227
# CC = gcc
@@ -55,6 +60,14 @@ VERSION = 0.3.0.dev
5560
# This flag is always set for POWER8. Don't modify the flag
5661
# USE_OPENMP = 1
5762

63+
# The OpenMP scheduler to use - by default this is "static" and you
64+
# will normally not want to change this unless you know that your main
65+
# workload will involve tasks that have highly unbalanced running times
66+
# for individual threads. Changing away from "static" may also adversely
67+
# affect memory access locality in NUMA systems. Setting to "runtime" will
68+
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
69+
# CCOMMON_OPT += -DOMP_SCHED=dynamic
70+
5871
# You can define maximum number of threads. Basically it should be
5972
# less than actual number of cores. If you don't specify one, it's
6073
# automatically detected by the the script.
@@ -151,8 +164,11 @@ NO_AFFINITY = 1
151164
# CONSISTENT_FPCSR = 1
152165

153166
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
154-
# with single thread. You can use this flag to avoid the overhead of multi-threading
155-
# in small matrix sizes. The default value is 4.
167+
# with single thread. (Actually in recent versions this is a factor proportional to the
168+
# number of floating point operations necessary for the given problem size, no longer
169+
# an individual dimension). You can use this setting to avoid the overhead of multi-
170+
# threading in small matrix sizes. The default value is 4, but values as high as 50 have
171+
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
156172
# GEMM_MULTITHREAD_THRESHOLD = 4
157173

158174
# If you need santy check by comparing reference BLAS. It'll be very

Makefile.system

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ ifeq ($(BINARY), 32)
6262
ifeq ($(TARGET), HASWELL)
6363
GETARCH_FLAGS := -DFORCE_NEHALEM
6464
endif
65+
ifeq ($(TARGET), SKYLAKEX)
66+
GETARCH_FLAGS := -DFORCE_NEHALEM
67+
endif
6568
ifeq ($(TARGET), SANDYBRIDGE)
6669
GETARCH_FLAGS := -DFORCE_NEHALEM
6770
endif
@@ -95,6 +98,9 @@ ifeq ($(BINARY), 32)
9598
ifeq ($(TARGET_CORE), HASWELL)
9699
GETARCH_FLAGS := -DFORCE_NEHALEM
97100
endif
101+
ifeq ($(TARGET_CORE), SKYLAKEX)
102+
GETARCH_FLAGS := -DFORCE_NEHALEM
103+
endif
98104
ifeq ($(TARGET_CORE), SANDYBRIDGE)
99105
GETARCH_FLAGS := -DFORCE_NEHALEM
100106
endif
@@ -141,6 +147,10 @@ ifeq ($(NO_AVX2), 1)
141147
GETARCH_FLAGS += -DNO_AVX2
142148
endif
143149

150+
ifeq ($(NO_AVX512), 1)
151+
GETARCH_FLAGS += -DNO_AVX512
152+
endif
153+
144154
ifeq ($(DEBUG), 1)
145155
GETARCH_FLAGS += -g
146156
endif
@@ -238,7 +248,7 @@ endif
238248

239249
ifeq ($(OSNAME), Darwin)
240250
ifndef MACOSX_DEPLOYMENT_TARGET
241-
export MACOSX_DEPLOYMENT_TARGET=10.6
251+
export MACOSX_DEPLOYMENT_TARGET=10.8
242252
endif
243253
MD5SUM = md5 -r
244254
endif
@@ -462,13 +472,37 @@ DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
462472
endif
463473

464474
ifeq ($(ARCH), x86_64)
465-
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
475+
DYNAMIC_CORE = PRESCOTT CORE2
476+
ifeq ($(DYNAMIC_OLDER), 1)
477+
DYNAMIC_CORE += PENRYN DUNNINGTON
478+
endif
479+
DYNAMIC_CORE += NEHALEM
480+
ifeq ($(DYNAMIC_OLDER), 1)
481+
DYNAMIC_CORE += OPTERON OPTERON_SSE3
482+
endif
483+
DYNAMIC_CORE += BARCELONA
484+
ifeq ($(DYNAMIC_OLDER), 1)
485+
DYNAMIC_CORE += BOBCAT ATOM NANO
486+
endif
466487
ifneq ($(NO_AVX), 1)
467488
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
468489
endif
469490
ifneq ($(NO_AVX2), 1)
470491
DYNAMIC_CORE += HASWELL ZEN
471492
endif
493+
ifneq ($(NO_AVX512), 1)
494+
ifneq ($(NO_AVX2), 1)
495+
DYNAMIC_CORE += SKYLAKEX
496+
endif
497+
endif
498+
endif
499+
500+
ifdef DYNAMIC_LIST
501+
override DYNAMIC_CORE = PRESCOTT $(DYNAMIC_LIST)
502+
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_PRESCOTT
503+
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
504+
CCOMMON_OPT += $(XCCOMMON_OPT)
505+
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
472506
endif
473507

474508
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
@@ -902,6 +936,10 @@ ifeq ($(DYNAMIC_ARCH), 1)
902936
CCOMMON_OPT += -DDYNAMIC_ARCH
903937
endif
904938

939+
ifeq ($(DYNAMIC_OLDER), 1)
940+
CCOMMON_OPT += -DDYNAMIC_OLDER
941+
endif
942+
905943
ifeq ($(NO_LAPACK), 1)
906944
CCOMMON_OPT += -DNO_LAPACK
907945
#Disable LAPACK C interface
@@ -924,6 +962,10 @@ ifeq ($(NO_AVX2), 1)
924962
CCOMMON_OPT += -DNO_AVX2
925963
endif
926964

965+
ifeq ($(NO_AVX512), 1)
966+
CCOMMON_OPT += -DNO_AVX512
967+
endif
968+
927969
ifdef SMP
928970
CCOMMON_OPT += -DSMP_SERVER
929971

@@ -1230,6 +1272,7 @@ export MSA_FLAGS
12301272
export KERNELDIR
12311273
export FUNCTION_PROFILE
12321274
export TARGET_CORE
1275+
export NO_AVX512
12331276

12341277
export SGEMM_UNROLL_M
12351278
export SGEMM_UNROLL_N

Makefile.x86_64

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ endif
88
endif
99
endif
1010

11+
ifeq ($(CORE), SKYLAKEX)
12+
ifndef NO_AVX512
13+
CCOMMON_OPT += -march=skylake-avx512
14+
FCOMMON_OPT += -march=skylake-avx512
15+
endif
16+
endif
17+
1118
ifeq ($(OSNAME), Interix)
1219
ARFLAGS = -m x64
1320
endif

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ DUNNINGTON
2020
NEHALEM
2121
SANDYBRIDGE
2222
HASWELL
23+
SKYLAKEX
2324
ATOM
2425

2526
b)AMD CPU:

c_check

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,21 @@ $architecture = zarch if ($data =~ /ARCH_ZARCH/);
201201
$binformat = bin32;
202202
$binformat = bin64 if ($data =~ /BINARY_64/);
203203

204+
$no_avx512= 0;
205+
if (($architecture eq "x86") || ($architecture eq "x86_64")) {
206+
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
207+
print $tmpf "int main(void){ __asm__ volatile($code); }\n";
208+
$args = " -march=skylake-avx512 -o $tmpf.o -x c $tmpf";
209+
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
210+
system(@cmd) == 0;
211+
if ($? != 0) {
212+
$no_avx512 = 1;
213+
} else {
214+
$no_avx512 = 0;
215+
}
216+
unlink("tmpf.o");
217+
}
218+
204219
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
205220

206221
$data =~ /globl\s([_\.]*)(.*)/;
@@ -288,6 +303,7 @@ print MAKEFILE "CROSS=1\n" if $cross != 0;
288303
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
289304
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
290305
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
306+
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
291307

292308
$os =~ tr/[a-z]/[A-Z]/;
293309
$architecture =~ tr/[a-z]/[A-Z]/;

cblas.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPE
8282
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
8383
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
8484

85+
CBLAS_INDEX cblas_isamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
86+
CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
87+
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
88+
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
89+
8590
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
8691
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
8792
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);

0 commit comments

Comments
 (0)