Skip to content

Commit 63b03ef

Browse files
authored
Merge pull request #2667 from xianyi/develop
Merge develop into 0.3.0 for 0.3.10 release
2 parents 33f76a6 + 95dbeff commit 63b03ef

File tree

196 files changed

+20992
-4085
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

196 files changed

+20992
-4085
lines changed

.drone.yml

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ platform:
88

99
steps:
1010
- name: Build and Test
11-
image: ubuntu:19.04
11+
image: ubuntu:18.04
1212
environment:
1313
CC: gcc
1414
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
@@ -32,7 +32,7 @@ platform:
3232

3333
steps:
3434
- name: Build and Test
35-
image: ubuntu:19.04
35+
image: ubuntu:18.04
3636
environment:
3737
CC: gcc
3838
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
@@ -141,3 +141,52 @@ steps:
141141
- cmake $CMAKE_FLAGS ..
142142
- make -j
143143
- ctest -V
144+
145+
---
146+
kind: pipeline
147+
name: arm64_native_test
148+
149+
platform:
150+
os: linux
151+
arch: arm64
152+
153+
steps:
154+
- name: Build and Test
155+
image: ubuntu:18.04
156+
environment:
157+
CC: gcc
158+
COMMON_FLAGS: 'USE_OPENMP=1'
159+
commands:
160+
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
161+
- apt-get update -y
162+
- apt-get install -y make $CC gfortran perl python g++
163+
- $CC --version
164+
- make QUIET_MAKE=1 $COMMON_FLAGS
165+
- make -C test $COMMON_FLAGS
166+
- make -C ctest $COMMON_FLAGS
167+
- make -C utest $COMMON_FLAGS
168+
- make -C cpp_thread_test dgemm_tester
169+
---
170+
kind: pipeline
171+
name: epyc_native_test
172+
173+
platform:
174+
os: linux
175+
arch: amd64
176+
177+
steps:
178+
- name: Build and Test
179+
image: ubuntu:18.04
180+
environment:
181+
CC: gcc
182+
COMMON_FLAGS: 'USE_OPENMP=1'
183+
commands:
184+
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
185+
- apt-get update -y
186+
- apt-get install -y make $CC gfortran perl python g++
187+
- $CC --version
188+
- make QUIET_MAKE=1 $COMMON_FLAGS
189+
- make -C test $COMMON_FLAGS
190+
- make -C ctest $COMMON_FLAGS
191+
- make -C utest $COMMON_FLAGS
192+
- make -C cpp_thread_test dgemm_tester

.github/workflows/dynamic_arch.yml

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
name: continuous build
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
build:
7+
runs-on: ${{ matrix.os }}
8+
strategy:
9+
fail-fast: false
10+
matrix:
11+
os: [ubuntu-latest, macos-latest]
12+
build: [cmake, make]
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v2
16+
17+
- name: Compilation cache
18+
uses: actions/cache@v2
19+
with:
20+
path: ~/.ccache
21+
# We include the commit sha in the cache key, as new cache entries are
22+
# only created if there is no existing entry for the key yet.
23+
key: ${{ runner.os }}-ccache-${{ github.sha }}
24+
# Restore any ccache cache entry, if none for
25+
# ${{ runner.os }}-ccache-${{ github.sha }} exists
26+
restore-keys: |
27+
${{ runner.os }}-ccache
28+
29+
- name: Print system information
30+
run: |
31+
if [ "$RUNNER_OS" == "Linux" ]; then
32+
cat /proc/cpuinfo
33+
elif [ "$RUNNER_OS" == "macOS" ]; then
34+
sysctl -a | grep machdep.cpu
35+
else
36+
echo "$RUNNER_OS not supported"
37+
exit 1
38+
fi
39+
40+
- name: Install Dependencies
41+
run: |
42+
if [ "$RUNNER_OS" == "Linux" ]; then
43+
sudo apt-get install -y gfortran cmake ccache
44+
elif [ "$RUNNER_OS" == "macOS" ]; then
45+
brew install coreutils cmake ccache
46+
else
47+
echo "$RUNNER_OS not supported"
48+
exit 1
49+
fi
50+
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB
51+
52+
- name: Build
53+
if: matrix.build == 'make'
54+
run: |
55+
if [ "$RUNNER_OS" == "Linux" ]; then
56+
export PATH="/usr/lib/ccache:${PATH}"
57+
elif [ "$RUNNER_OS" == "macOS" ]; then
58+
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
59+
else
60+
echo "$RUNNER_OS not supported"
61+
exit 1
62+
fi
63+
64+
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0
65+
66+
- name: CMake build
67+
if: matrix.build == 'cmake'
68+
run: |
69+
if [ "$RUNNER_OS" == "Linux" ]; then
70+
export PATH="/usr/lib/ccache:${PATH}"
71+
elif [ "$RUNNER_OS" == "macOS" ]; then
72+
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
73+
else
74+
echo "$RUNNER_OS not supported"
75+
exit 1
76+
fi
77+
78+
mkdir build
79+
cd build
80+
cmake -DDYNAMIC_ARCH=1 -DNOFORTRAN=0 -DBUILD_WITHOUT_LAPACK=0 -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_BUILD_TYPE=Release ..
81+
make -j$(nproc)

.github/workflows/nightly-Homebrew-build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ jobs:
2121
build-OpenBLAS-with-Homebrew:
2222
runs-on: macos-latest
2323
env:
24+
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
2425
HOMEBREW_DEVELOPER: "ON"
2526
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
2627
HOMEBREW_NO_ANALYTICS: "ON"

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ test/SBLAT2.SUMM
7070
test/SBLAT3.SUMM
7171
test/ZBLAT2.SUMM
7272
test/ZBLAT3.SUMM
73+
test/SHBLAT3.SUMM
7374
test/cblat1
7475
test/cblat2
7576
test/cblat3
@@ -79,6 +80,7 @@ test/dblat3
7980
test/sblat1
8081
test/sblat2
8182
test/sblat3
83+
test/test_shgemm
8284
test/zblat1
8385
test/zblat2
8486
test/zblat3

.travis.yml

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ matrix:
1616
before_script: &common-before
1717
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
1818
script:
19-
- set -e
2019
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
2120
- make -C test $COMMON_FLAGS $BTYPE
2221
- make -C ctest $COMMON_FLAGS $BTYPE
@@ -34,6 +33,16 @@ matrix:
3433
- TARGET_BOX=PPC64LE_LINUX
3534
- BTYPE="BINARY=64 USE_OPENMP=1"
3635

36+
- <<: *test-ubuntu
37+
os: linux
38+
arch: s390x
39+
before_script:
40+
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
41+
env:
42+
# for matrix annotation only
43+
- TARGET_BOX=IBMZ_LINUX
44+
- BTYPE="BINARY=64 USE_OPENMP=1"
45+
3746
- <<: *test-ubuntu
3847
env:
3948
- TARGET_BOX=LINUX64
@@ -98,7 +107,6 @@ matrix:
98107
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
99108
before_script: *common-before
100109
script:
101-
- set -e
102110
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
103111
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
104112
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
@@ -141,7 +149,6 @@ matrix:
141149
before_script:
142150
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
143151
script:
144-
- set -e
145152
- mkdir build
146153
- CONFIG=Release
147154
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
@@ -180,6 +187,12 @@ matrix:
180187
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
181188
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
182189

190+
- <<: *test-macos
191+
osx_image: xcode10.1
192+
env:
193+
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
194+
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
195+
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
183196
# whitelist
184197
branches:
185198
only:

CMakeLists.txt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 9)
9+
set(OpenBLAS_PATCH_VERSION 10)
10+
1011
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1112

1213
# Adhere to GNU filesystem layout conventions
@@ -23,6 +24,7 @@ option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS fun
2324
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
2425
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
2526
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
27+
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)
2628
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
2729
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
2830
else()
@@ -86,9 +88,13 @@ if (NOT NO_LAPACK)
8688
list(APPEND SUBDIRS lapack)
8789
endif ()
8890

91+
if (NOT DEFINED BUILD_HALF)
92+
set (BUILD_HALF false)
93+
endif ()
8994
# set which float types we want to build for
9095
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
9196
# if none are defined, build for all
97+
# set(BUILD_HALF true)
9298
set(BUILD_SINGLE true)
9399
set(BUILD_DOUBLE true)
94100
set(BUILD_COMPLEX true)
@@ -120,6 +126,11 @@ if (BUILD_COMPLEX16)
120126
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
121127
endif ()
122128

129+
if (BUILD_HALF)
130+
message(STATUS "Building Half Precision")
131+
list(APPEND FLOAT_TYPES "HALF") # defines nothing
132+
endif ()
133+
123134
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
124135
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
125136
endif ()
@@ -234,7 +245,7 @@ if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
234245
if (NOT MSVC)
235246
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
236247
else()
237-
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
248+
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
238249
endif()
239250
endif()
240251

CONTRIBUTORS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,13 @@ In chronological order:
180180
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
181181
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
182182
* [2020-01-07] optimize AVX2 SGEMM and STRMM
183+
184+
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
185+
* [2020-04-15] Half-precision GEMM for bfloat16
186+
187+
* Marius Hillenbrand <https://github.com/mhillenibm>
188+
* [2020-05-12] Revise dynamic architecture detection for IBM z
189+
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
190+
191+
* Danfeng Zhang <https://github.com/craft-zhang>
192+
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53

Changelog.txt

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,77 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.10
4+
14-Jun-2020
5+
6+
common:
7+
* Improved thread locking behaviour in blas_server and parallel getrf
8+
* Imported bugfix 394 from LAPACK (spurious reference to "XERBL"
9+
due to overlong lines)
10+
* Imported bugfix 403 from LAPACK (compile option "recursive" required
11+
for correctness with Intel and PGI)
12+
* Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB)
13+
* Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP)
14+
* Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that
15+
could lead to crashes at large matrix sizes
16+
* Restored internal soname in dynamic libraries on FreeBSD and Dragonfly
17+
* Added API (openblas_setaffinity) to set the thread affinity on Linux
18+
* Added initial infrastructure for half-precision floating point
19+
(bfloat16) support with a generic implementation of SHGEMM
20+
* Added CMAKE build system support for building the cblas_Xgemm3m
21+
functions
22+
* Fixed CMAKE support for building in a path with embedded spaces
23+
* Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC
24+
* Fixed GCC version detection in the Makefiles
25+
* Allowed overriding the names of AR, AS and LD in Makefile builds
26+
27+
POWER:
28+
* Fixed big-endian POWER8 ELFv2 builds on FreeBSD
29+
* Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9
30+
* Fixed CMAKE build support for POWER9
31+
* fixed a potential race condition in the thread buffer allocation
32+
* Worked around LAPACK test failures on PPC G4
33+
34+
MIPS:
35+
* Fixed a potential race condition in the thread buffer allocation
36+
* Added support for MIPS 24K/24KE family based on P5600 kernels
37+
38+
MIPS64:
39+
* fixed a potential race condition in the thread buffer allocation
40+
* Added TARGET=GENERIC
41+
42+
ARMV7:
43+
* Fixed a race condition in the thread buffer allocation
44+
45+
ARMV8:
46+
* Fixed a race condition in the thread buffer allocation
47+
* Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA
48+
* Improved performance of the ThunderX2 DAXPY kernel
49+
* Added an optimized SGEMM kernel for Cortex A53
50+
* Fixed Makefile support for INTERFACE64 (8-byte integer)
51+
52+
x86_64:
53+
* Fixed a syntax error in the CMAKE setup for SkylakeX
54+
* Improved performance of STRSM on Haswell, SkylakeX and Ryzen
55+
* Improved SGEMM performance on SGEMM for workloads with ldc a
56+
multiple of 1024
57+
* Improved DGEMM performance on Skylake X
58+
* Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH
59+
builds created on SkylakeX
60+
* Removed data alignment requirement in the SSE2 copy kernels
61+
that could cause spurious crashes
62+
* Added a workaround for an optimizer bug in AppleClang 11.0.3
63+
* Fixed LAPACK test failures due to wrong options for Intel Fortran
64+
* Fixed compilation and LAPACK test results with recent Flang
65+
and AMD AOCC
66+
* Fixed DYNAMIC_ARCH builds with CMAKE on OS X
67+
* Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max,
68+
cblas_?sum, cblas_?gemm3m in the shared library on OS
69+
* Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes
70+
show the name of an older generation chip supported by the same kernels)
71+
72+
IBM Z:
73+
* Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14
74+
275
====================================================================
376
Version 0.3.9
477
1-Mar-2020

0 commit comments

Comments
 (0)