Skip to content

Commit 51c2261

Browse files
authored
Merge pull request #2907 from xianyi/develop
Update from develop for 0.3.11
2 parents 63b03ef + b8f6892 commit 51c2261

File tree

503 files changed

+26175
-3622
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

503 files changed

+26175
-3622
lines changed

.github/workflows/dynamic_arch.yml

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ jobs:
99
fail-fast: false
1010
matrix:
1111
os: [ubuntu-latest, macos-latest]
12+
fortran: [gfortran, flang]
1213
build: [cmake, make]
1314
steps:
1415
- name: Checkout repository
@@ -24,7 +25,7 @@ jobs:
2425
# Restore any ccache cache entry, if none for
2526
# ${{ runner.os }}-ccache-${{ github.sha }} exists
2627
restore-keys: |
27-
${{ runner.os }}-ccache
28+
${{ runner.os }}-ccache-
2829
2930
- name: Print system information
3031
run: |
@@ -49,8 +50,8 @@ jobs:
4950
fi
5051
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB
5152
52-
- name: Build
53-
if: matrix.build == 'make'
53+
- name: gfortran build
54+
if: matrix.build == 'make' && matrix.fortran == 'gfortran'
5455
run: |
5556
if [ "$RUNNER_OS" == "Linux" ]; then
5657
export PATH="/usr/lib/ccache:${PATH}"
@@ -63,8 +64,29 @@ jobs:
6364
6465
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0
6566
66-
- name: CMake build
67-
if: matrix.build == 'cmake'
67+
- name: flang build
68+
if: matrix.build == 'make' && matrix.fortran == 'flang'
69+
run: |
70+
if [ "$RUNNER_OS" == "Linux" ]; then
71+
export PATH="/usr/lib/ccache:${PATH}"
72+
elif [ "$RUNNER_OS" == "macOS" ]; then
73+
exit 0
74+
else
75+
echo "$RUNNER_OS not supported"
76+
exit 1
77+
fi
78+
79+
cd /usr/
80+
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
81+
sudo tar xf flang-20190329-x86-70.tgz
82+
sudo rm flang-20190329-x86-70.tgz
83+
cd -
84+
85+
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC=flang
86+
87+
88+
- name: CMake gfortran build
89+
if: matrix.build == 'cmake' && matrix.fortran == 'gfortran'
6890
run: |
6991
if [ "$RUNNER_OS" == "Linux" ]; then
7092
export PATH="/usr/lib/ccache:${PATH}"

.travis.yml

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ matrix:
4343
- TARGET_BOX=IBMZ_LINUX
4444
- BTYPE="BINARY=64 USE_OPENMP=1"
4545

46+
- <<: *test-ubuntu
47+
os: linux
48+
dist: focal
49+
arch: s390x
50+
compiler: clang
51+
before_script:
52+
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
53+
env:
54+
# for matrix annotation only
55+
- TARGET_BOX=IBMZ_LINUX
56+
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"
57+
4658
- <<: *test-ubuntu
4759
env:
4860
- TARGET_BOX=LINUX64
@@ -75,6 +87,40 @@ matrix:
7587
- TARGET_BOX=LINUX32
7688
- BTYPE="BINARY=32"
7789

90+
- os: linux
91+
arch: ppc64le
92+
dist: bionic
93+
compiler: gcc
94+
before_script:
95+
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
96+
- sudo apt-get update
97+
- sudo apt-get install gcc-9 gfortran-9 -y
98+
script:
99+
- make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
100+
- make -C test $COMMON_FLAGS $BTYPE
101+
- make -C ctest $COMMON_FLAGS $BTYPE
102+
- make -C utest $COMMON_FLAGS $BTYPE
103+
env:
104+
# for matrix annotation only
105+
- TARGET_BOX=PPC64LE_LINUX_P9
106+
107+
- os: linux
108+
arch: ppc64le
109+
dist: bionic
110+
compiler: gcc
111+
before_script:
112+
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
113+
- sudo apt-get update
114+
- sudo apt-get install gcc-9 gfortran-9 -y
115+
script:
116+
- make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
117+
- make -C test $COMMON_FLAGS $BTYPE
118+
- make -C ctest $COMMON_FLAGS $BTYPE
119+
- make -C utest $COMMON_FLAGS $BTYPE
120+
env:
121+
# for matrix annotation only
122+
- TARGET_BOX=PPC64LE_LINUX_P9
123+
78124
- os: linux
79125
compiler: gcc
80126
addons:
@@ -175,6 +221,17 @@ matrix:
175221
env:
176222
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
177223

224+
- <<: *test-macos
225+
osx_image: xcode12
226+
before_script:
227+
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
228+
- brew update
229+
- brew install gcc@10 # for gfortran
230+
script:
231+
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
232+
env:
233+
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
234+
178235
- <<: *test-macos
179236
osx_image: xcode10.0
180237
env:
@@ -193,6 +250,21 @@ matrix:
193250
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
194251
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
195252
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
253+
254+
- &test-graviton2
255+
os: linux
256+
arch: arm64-graviton2
257+
dist: focal
258+
group: edge
259+
virt: lxd
260+
compiler: gcc
261+
addons:
262+
apt:
263+
packages:
264+
- gfortran
265+
script:
266+
- travis_wait 45 make && make lapack-test
267+
196268
# whitelist
197269
branches:
198270
only:

CMakeLists.txt

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 10)
10-
9+
set(OpenBLAS_PATCH_VERSION 11)
1110
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1211

1312
# Adhere to GNU filesystem layout conventions
@@ -30,6 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
3029
else()
3130
set(NO_AFFINITY 1)
3231
endif()
32+
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
33+
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
3334

3435
# Add a prefix or suffix to all exported symbol names in the shared library.
3536
# Avoids conflicts with other BLAS libraries, especially when using
@@ -88,13 +89,13 @@ if (NOT NO_LAPACK)
8889
list(APPEND SUBDIRS lapack)
8990
endif ()
9091

91-
if (NOT DEFINED BUILD_HALF)
92-
set (BUILD_HALF false)
92+
if (NOT DEFINED BUILD_BFLOAT16)
93+
set (BUILD_BFLOAT16 false)
9394
endif ()
9495
# set which float types we want to build for
9596
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
9697
# if none are defined, build for all
97-
# set(BUILD_HALF true)
98+
# set(BUILD_BFLOAT16 true)
9899
set(BUILD_SINGLE true)
99100
set(BUILD_DOUBLE true)
100101
set(BUILD_COMPLEX true)
@@ -126,9 +127,9 @@ if (BUILD_COMPLEX16)
126127
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
127128
endif ()
128129

129-
if (BUILD_HALF)
130+
if (BUILD_BFLOAT16)
130131
message(STATUS "Building Half Precision")
131-
list(APPEND FLOAT_TYPES "HALF") # defines nothing
132+
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
132133
endif ()
133134

134135
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
@@ -234,6 +235,10 @@ if (NOT MSVC AND NOT NOFORTRAN)
234235
if(NOT NO_CBLAS)
235236
add_subdirectory(ctest)
236237
endif()
238+
add_subdirectory(lapack-netlib/TESTING)
239+
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
240+
add_subdirectory(cpp_thread_test)
241+
endif()
237242
endif()
238243

239244
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
@@ -249,7 +254,7 @@ if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
249254
endif()
250255
endif()
251256

252-
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
257+
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
253258
if (NOT DEFINED ARCH)
254259
set(ARCH_IN "x86_64")
255260
else()
@@ -358,10 +363,21 @@ endif()
358363

359364
if(NOT NO_CBLAS)
360365
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
361-
362366
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
363367
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
364368
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
369+
if (NOT ${SYMBOLPREFIX} STREQUAL "")
370+
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
371+
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
372+
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
373+
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
374+
endif()
375+
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
376+
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
377+
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
378+
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
379+
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
380+
endif()
365381
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
366382
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
367383
endif()
@@ -378,11 +394,9 @@ if(NOT NO_LAPACKE)
378394
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
379395
endif()
380396

381-
include(FindPkgConfig QUIET)
382-
if(PKG_CONFIG_FOUND)
383-
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
384-
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
385-
endif()
397+
# Install pkg-config files
398+
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
399+
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
386400

387401

388402
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ In chronological order:
187187
* Marius Hillenbrand <https://github.com/mhillenibm>
188188
* [2020-05-12] Revise dynamic architecture detection for IBM z
189189
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
190+
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
190191

191192
* Danfeng Zhang <https://github.com/craft-zhang>
192193
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53

Changelog.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,76 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.11
4+
17-Oct-2020
5+
6+
common:
7+
* API change:
8+
the newly added BFLOAT16 functions were renamed to use the
9+
letter "B" instead of "H" to avoid potential confusion with
10+
the IEEE "half precision float" type, i.e. the 0.3.10
11+
SHGEMM is now SBGEMM and the corresponding build option
12+
was changed from "BUILD_HALF" to "BUILD_BFLOAT16".
13+
* Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper
14+
limit for placing temporary arrays on the stack) to be compatible
15+
with a stack size of 1mb (as imposed by the JAVA runtime library)
16+
* Added mixed-precision dot function SBDOT and utility functions
17+
shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between
18+
single or double precision float arrays and bfloat16 arrays
19+
* Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions
20+
in lapack.h
21+
* Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2
22+
(causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263)
23+
* Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415)
24+
* Fixed several bugs in the LAPACK testsuite
25+
* Improved performance of TRMM and TRSM for certain problem sizes
26+
* Fixed infinite recursions and workspace miscalculations in ReLAPACK
27+
* CMAKE builds no longer require pkg-config for creating the .pc file
28+
* Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as
29+
enabling these options
30+
* Fixed detection of gfortran when invoked through an mpi wrapper
31+
* Improve thread reinitialization performance with OpenMP xafter a fork
32+
* Added support for building only the subset of the library required
33+
for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE
34+
* Optional function name prefixes and suffixes are now correctly
35+
reflected in the generated cblas.h
36+
* Added CMAKE build support for the LAPACK and multithreading tests
37+
38+
POWER:
39+
* Added optimized support for POWER10
40+
* Added support for compiling for POWER8 in 32bit mode
41+
* Added support for compilation with LLVM/clang
42+
* Added support for compilation with NVIDIA/PGI compilers
43+
* Fixed building on big-endian POWER8
44+
* Fixed miscompilation of ZDOTC by gcc10
45+
* Fixed alignment errors in the POWER8 SAXPY kernel
46+
* Improved CPU detection on AIX
47+
* Supported building with older compilers on POWER9
48+
49+
x86_64:
50+
* Added support for Intel Cooperlake
51+
* Added autodetection of AMD Renoir/Matisse/Zen3 cpus
52+
* Added autodetection of Intel Comet Lake cpus
53+
* Reimplemented ?sum, ?dot and daxpy using universal intrinsics
54+
* Reset the fpu state before using the fpu on Windows as a workaround
55+
for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004)
56+
* Fixed potentially undefined behaviour in the dot and gemv_t kernels
57+
* Fixed a potential segmentation fault in DYNAMIC_ARCH builds
58+
* Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers
59+
60+
ARMV7:
61+
* Fixed cpu detection on BSD-like systems
62+
63+
ARMV8:
64+
* Added preliminary support for Apple Vortex cpus
65+
* Added support for the Cavium ThunderX3T110 cpu
66+
* Fixed cpu detection on BSD-like systems
67+
* Fixed compilation in -std=C18 mode
68+
69+
70+
IBM Z:
71+
* Added support for compiling with the clang compiler
72+
* Improved GEMM performance on Z14
73+
274
====================================================================
375
Version 0.3.10
476
14-Jun-2020

Jenkinsfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
node {
2+
stage('Checkout') {
3+
checkout
4+
}
5+
6+
stage('Build') {
7+
sh("make")
8+
}
9+
}

0 commit comments

Comments
 (0)