Skip to content

Commit fab7462

Browse files
authored
Merge pull request #3304 from xianyi/develop
Merge develop int0 0.3.0 for release 0.3.16
2 parents 4c81d1c + 847607c commit fab7462

File tree

129 files changed

+4334
-2351
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+4334
-2351
lines changed

.github/workflows/nightly-Homebrew-build.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,6 @@ jobs:
4343
- name: Update Homebrew
4444
if: github.event_name != 'pull_request'
4545
run: brew update || true
46-
47-
- name: unlink installed gcc to allow updating
48-
run: |
49-
brew unlink gcc@8
50-
brew unlink gcc@9
5146

5247
- name: Install prerequisites
5348
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas

CONTRIBUTORS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,3 +194,6 @@ In chronological order:
194194

195195
* PingTouGe Semiconductor Co., Ltd.
196196
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910
197+
198+
* River Dillon <[email protected]>
199+
* [2021-07-10] fix compilation with musl libc

Changelog.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,52 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.16
4+
11-Jul-2021
5+
6+
common:
7+
- drastically reduced the stack size requirements for running the LAPACK
8+
testsuite (Reference-LAPACK PR 553)
9+
- fixed spurious test failures in the LAPACK testsuite (Reference-LAPACK
10+
PR 564)
11+
- expressly setting DYNAMIC_ARCH=0 no longer enables dynamic_arch mode
12+
- improved performance of xGER, xSPR, xSPR2, xSYR, xSYR2, xTRSV, SGEMV_N
13+
and DGEMV_N, for small input sizes and consecutive arguments
14+
- improved performance of xGETRF, xPORTF and xPOTRI for small input sizes
15+
by disabling multithreading
16+
- fixed installing with BSD versions of the "install" utility
17+
18+
RISCV:
19+
- fixed the implementation of xIMIN
20+
- improved the performance of DSDOT
21+
- fixed linking of the tests on C910V with current vendor gcc
22+
23+
POWER:
24+
- fixed SBGEMM computation for some odd value inputs
25+
- fixed compilation for PPCG4, PPC970, POWER3, POWER4 and POWER5
26+
27+
x86_64:
28+
- improved performance of SGEMV_N and SGEMV_T for small N on AVX512-capable cpus
29+
- worked around a miscompilation of ZGEMM/ZTRMM on Sandybridge with old gcc
30+
versions
31+
- fixed compilation with MS Visual Studio versions older than 2017
32+
- fixed macro name collision with winnt.h from the latest Win10 SDK
33+
- added cpu type autodetection for Intel Ice Lake SP
34+
- fixed cpu type autodetection for Intel Tiger Lake
35+
- added cpu type autodetection for recent Centaur/Zhaoxin models
36+
- fixed compilation with musl libc
37+
38+
ARM64:
39+
- fixed compilation with gcc/gfortran on the Apple M1
40+
- fixed linking of the tests on FreeBSD
41+
- fixed missing restore of a register in the recently rewritten DNRM2 kernel
42+
for ThunderX2 and Neoverse N1 that could cause spurious failures in e.g.
43+
DGEEV
44+
- added compiler optimization flags for the EMAG8180
45+
- added initial support for Cortex A55
46+
47+
ARM:
48+
- fixed linking of the tests on FreeBSD
49+
250
====================================================================
351
Version 0.3.15
452
2-May-2021

Makefile.arm64

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,28 @@ endif
5757
endif
5858
endif
5959

60+
# Use a53 tunings because a55 is only available in GCC>=8.1
61+
ifeq ($(CORE), CORTEXA55)
62+
ifeq ($(GCCVERSIONGTEQ7), 1)
63+
ifeq ($(GCCVERSIONGTEQ8), 1)
64+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
65+
ifneq ($(F_COMPILER), NAG)
66+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
67+
endif
68+
else
69+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
70+
ifneq ($(F_COMPILER), NAG)
71+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
72+
endif
73+
endif
74+
else
75+
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
76+
ifneq ($(F_COMPILER), NAG)
77+
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
78+
endif
79+
endif
80+
endif
81+
6082
ifeq ($(CORE), THUNDERX)
6183
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
6284
ifneq ($(F_COMPILER), NAG)
@@ -107,4 +129,13 @@ FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
107129
endif
108130
endif
109131
endif
132+
133+
ifeq ($(GCCVERSIONGTEQ9), 1)
134+
ifeq ($(CORE), EMAG8180)
135+
CCOMMON_OPT += -march=armv8-a -mtune=emag
136+
ifneq ($(F_COMPILER), NAG)
137+
FCOMMON_OPT += -march=armv8-a -mtune=emag
138+
endif
139+
endif
140+
endif
110141
endif

Makefile.install

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,25 +74,25 @@ endif
7474
ifneq ($(OSNAME), AIX)
7575
ifndef NO_LAPACKE
7676
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
77-
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
78-
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
79-
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
80-
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
81-
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
77+
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
78+
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
79+
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
80+
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
81+
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
8282
endif
8383

8484
#for install static library
8585
ifneq ($(NO_STATIC),1)
8686
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
87-
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
87+
@install -m644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
8888
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
8989
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
9090
endif
9191
#for install shared library
9292
ifneq ($(NO_SHARED),1)
9393
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
9494
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
95-
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
95+
@install -m755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
9696
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
9797
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
9898
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)

Makefile.system

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
333333
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
334334
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
335335
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
336+
GCCVERSIONGTEQ8 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 8)
336337
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
337338
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
338339
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
@@ -380,6 +381,12 @@ ifeq ($(OSNAME), AIX)
380381
EXTRALIB += -lm
381382
endif
382383

384+
ifeq ($(OSNAME), FreeBSD)
385+
ifeq ($(ARCH), $(filter $(ARCH),arm arm64))
386+
EXTRALIB += -lm
387+
endif
388+
endif
389+
383390
ifeq ($(OSNAME), WINNT)
384391
NEED_PIC = 0
385392
NO_EXPRECISION = 1
@@ -619,6 +626,7 @@ DYNAMIC_CORE += CORTEXA57
619626
DYNAMIC_CORE += CORTEXA72
620627
DYNAMIC_CORE += CORTEXA73
621628
DYNAMIC_CORE += NEOVERSEN1
629+
DYNAMIC_CORE += CORTEXA55
622630
DYNAMIC_CORE += FALKOR
623631
DYNAMIC_CORE += THUNDERX
624632
DYNAMIC_CORE += THUNDERX2T99

Makefile.x86

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# COMPILER_PREFIX = mingw32-
22

3-
ifndef DYNAMIC_ARCH
3+
ifneq ($(DYNAMIC_ARCH),1)
44
ADD_CPUFLAGS = 1
55
else
66
ifdef TARGET_CORE

Makefile.x86_64

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ endif
99
endif
1010

1111

12-
ifndef DYNAMIC_ARCH
12+
ifneq ($(DYNAMIC_ARCH),1)
1313
ADD_CPUFLAGS = 1
1414
else
1515
ifdef TARGET_CORE

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ We provide official binary packages for the following platform:
2727

2828
* Windows x86/x86_64
2929

30-
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
30+
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the Releases section of the github project page, [https://github.com/xianyi/OpenBLAS/releases](https://github.com/xianyi/OpenBLAS/releases).
3131

3232
## Installation from Source
3333

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ CORTEXA57
9292
CORTEXA72
9393
CORTEXA73
9494
NEOVERSEN1
95+
CORTEXA55
9596
EMAG8180
9697
FALKOR
9798
THUNDERX

0 commit comments

Comments
 (0)