Skip to content

Commit 2f6d35c

Browse files
authored
Merge pull request #3150 from xianyi/develop
Update branch from develop for 0.3.14 release
2 parents d2b11c4 + 86de5f7 commit 2f6d35c

File tree

102 files changed

+4895
-433
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+4895
-433
lines changed

.drone.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,27 @@ steps:
190190
- make -C ctest $COMMON_FLAGS
191191
- make -C utest $COMMON_FLAGS
192192
- make -C cpp_thread_test dgemm_tester
193+
---
194+
kind: pipeline
195+
name: arm64_gcc10
196+
197+
platform:
198+
os: linux
199+
arch: arm64
200+
201+
steps:
202+
- name: Build and Test
203+
image: ubuntu:20.04
204+
environment:
205+
CC: gcc-10
206+
FC: gfortran-10
207+
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1'
208+
commands:
209+
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
210+
- apt-get update -y
211+
- apt-get install -y make $CC gfortran-10 perl python g++
212+
- $CC --version
213+
- make QUIET_MAKE=1 $COMMON_FLAGS
214+
- make -C utest $COMMON_FLAGS
215+
- make -C test $COMMON_FLAGS
216+

.github/workflows/nightly-Homebrew-build.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ jobs:
4444
if: github.event_name != 'pull_request'
4545
run: brew update || true
4646

47+
- name: unlink installed gcc to allow updating
48+
run: |
49+
brew unlink gcc@8
50+
brew unlink gcc@9
51+
4752
- name: Install prerequisites
4853
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas
4954

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,5 +89,7 @@ build.*
8989
*.swp
9090
benchmark/*.goto
9191
benchmark/smallscaling
92+
.vscode
9293
CMakeCache.txt
9394
CMakeFiles/*
95+
.vscode

CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 13)
9+
set(OpenBLAS_PATCH_VERSION 14)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions
1313
include(GNUInstallDirs)
1414

1515
include(CMakePackageConfigHelpers)
1616

17+
if(MSVC AND NOT DEFINED NOFORTRAN)
18+
set(NOFORTRAN ON)
19+
endif()
1720

1821
#######
1922
if(MSVC)
@@ -229,7 +232,7 @@ if (NOT NO_CBLAS)
229232
add_subdirectory(utest)
230233
endif()
231234

232-
if (NOT MSVC AND NOT NOFORTRAN)
235+
if (NOT NOFORTRAN)
233236
# Build test and ctest
234237
add_subdirectory(test)
235238
if(NOT NO_CBLAS)

Changelog.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,52 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.14
4+
17-Mar-2021
5+
6+
common:
7+
* Fixed a race condition on thread shutdown in non-OpenMP builds
8+
* Fixed custom BUFFERSIZE option getting ignored in gmake builds
9+
* Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
10+
* Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
11+
* Improved performance of OMATCOPY_RT across all platforms
12+
* Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
13+
* Fixed potential misreading of the GCC compiler version in the build scripts
14+
* Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
15+
* Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)
16+
17+
RISCV:
18+
* Fixed compilation on RISCV (missing entry in getarch)
19+
20+
POWER:
21+
* Fixed compilation for DYNAMIC_ARCH with clang and with old gcc versions
22+
* Added support for compilation on FreeBSD/ppc64le
23+
* Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
24+
* Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
25+
* Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
26+
* Improved SCOPY and CCOPY performance on POWER10
27+
* Improved SGEMM and DGEMM performance on POWER10
28+
* Added support for compilation with the NVIDIA HPC compiler
29+
30+
x86_64:
31+
* Added an optimized bfloat16 GEMM kernel for Cooperlake
32+
* Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
33+
* Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
34+
* Added support for compilation with the NAG Fortran compiler
35+
* Fixed recognition of the AMD AOCC compiler
36+
* Fixed compilation for DYNAMIC_ARCH with clang on Windows
37+
* Added support for running the BLAS/CBLAS tests on Windows
38+
* Fixed signatures of the tls callback functions for Windows x64
39+
* Fixed various issues with fma intrinsics support handling
40+
41+
ARM:
42+
* Added support for embedded Cortex M targets via a new option EMBEDDED
43+
44+
ARMV8:
45+
* Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf
46+
* Added support for the DYNAMIC_LIST option
47+
* Added support for compilation with the NVIDIA HPC compiler
48+
* Added support for compiling with the NAG Fortran compiler
49+
250
====================================================================
351
Version 0.3.13
452
12-Dec-2020

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ endif
5959
@$(CC) --version > /dev/null 2>&1;\
6060
if [ $$? -eq 0 ]; then \
6161
cverinfo=`$(CC) --version | sed -n '1p'`; \
62+
if [ -z "$${cverinfo}" ]; then \
63+
cverinfo=`$(CC) --version | sed -n '2p'`; \
64+
fi; \
6265
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
6366
else \
6467
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
@@ -67,6 +70,9 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
6770
@$(FC) --version > /dev/null 2>&1;\
6871
if [ $$? -eq 0 ]; then \
6972
fverinfo=`$(FC) --version | sed -n '1p'`; \
73+
if [ -z "$${fverinfo}" ]; then \
74+
fverinfo=`$(FC) --version | sed -n '2p'`; \
75+
fi; \
7076
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
7177
else \
7278
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\

Makefile.arm64

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,110 @@
1-
1+
ifneq ($(C_COMPILER), PGI)
22
ifeq ($(CORE), ARMV8)
33
CCOMMON_OPT += -march=armv8-a
4+
ifneq ($(F_COMPILER), NAG)
45
FCOMMON_OPT += -march=armv8-a
56
endif
7+
endif
68

79
ifeq ($(CORE), CORTEXA53)
810
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
11+
ifneq ($(F_COMPILER), NAG)
912
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
1013
endif
14+
endif
1115

1216
ifeq ($(CORE), CORTEXA57)
1317
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
18+
ifneq ($(F_COMPILER), NAG)
1419
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
1520
endif
21+
endif
1622

1723
ifeq ($(CORE), CORTEXA72)
1824
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
25+
ifneq ($(F_COMPILER), NAG)
1926
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
2027
endif
28+
endif
2129

2230
ifeq ($(CORE), CORTEXA73)
2331
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
32+
ifneq ($(F_COMPILER), NAG)
2433
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
2534
endif
35+
endif
2636

2737
# Use a72 tunings because Neoverse-N1 is only available
2838
# in GCC>=9
2939
ifeq ($(CORE), NEOVERSEN1)
3040
ifeq ($(GCCVERSIONGTEQ7), 1)
3141
ifeq ($(GCCVERSIONGTEQ9), 1)
3242
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
43+
ifneq ($(F_COMPILER), NAG)
3344
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
45+
endif
3446
else
3547
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
48+
ifneq ($(F_COMPILER), NAG)
3649
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
3750
endif
51+
endif
3852
else
3953
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
54+
ifneq ($(F_COMPILER), NAG)
4055
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
4156
endif
4257
endif
58+
endif
4359

4460
ifeq ($(CORE), THUNDERX)
4561
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
62+
ifneq ($(F_COMPILER), NAG)
4663
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
4764
endif
65+
endif
4866

4967
ifeq ($(CORE), FALKOR)
5068
CCOMMON_OPT += -march=armv8-a -mtune=falkor
69+
ifneq ($(F_COMPILER), NAG)
5170
FCOMMON_OPT += -march=armv8-a -mtune=falkor
5271
endif
72+
endif
5373

5474
ifeq ($(CORE), THUNDERX2T99)
5575
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
76+
ifneq ($(F_COMPILER), NAG)
5677
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
5778
endif
79+
endif
5880

5981
ifeq ($(CORE), THUNDERX3T110)
6082
ifeq ($(GCCVERSIONGTEQ10), 1)
6183
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
84+
ifneq ($(F_COMPILER), NAG)
6285
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
86+
endif
6387
else
6488
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
89+
ifneq ($(F_COMPILER), NAG)
6590
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
6691
endif
6792
endif
93+
endif
6894

6995
ifeq ($(CORE), VORTEX)
7096
CCOMMON_OPT += -march=armv8.3-a
97+
ifneq ($(F_COMPILER), NAG)
7198
FCOMMON_OPT += -march=armv8.3-a
7299
endif
100+
endif
73101

74102
ifeq ($(GCCVERSIONGTEQ9), 1)
75103
ifeq ($(CORE), TSV110)
76104
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
105+
ifneq ($(F_COMPILER), NAG)
77106
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
78107
endif
79108
endif
80-
109+
endif
110+
endif

Makefile.power

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ USE_OPENMP = 1
1010
endif
1111

1212
ifeq ($(CORE), POWER10)
13+
ifneq ($(C_COMPILER), PGI)
1314
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
1415
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
1516
endif
17+
endif
1618

1719
ifeq ($(CORE), POWER9)
1820
ifneq ($(C_COMPILER), PGI)

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.13
6+
VERSION = 0.3.14
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

0 commit comments

Comments
 (0)