Skip to content

Commit 89eea6b

Browse files
authored
Merge pull request #102 from xianyi/develop
rebase
2 parents 25907e6 + 0ac6102 commit 89eea6b

File tree

4 files changed

+78
-2
lines changed

4 files changed

+78
-2
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 10.dev)
9+
set(OpenBLAS_PATCH_VERSION 11.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions

Changelog.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,76 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.11
4+
17-Oct-2020
5+
6+
common:
7+
* API change:
8+
the newly added BFLOAT16 functions were renamed to use the
9+
letter "B" instead of "H" to avoid potential confusion with
10+
the IEEE "half precision float" type, i.e. the 0.3.10
11+
SHGEMM is now SBGEMM and the corresponding build option
12+
was changed from "BUILD_HALF" to "BUILD_BFLOAT16".
13+
* Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper
14+
limit for placing temporary arrays on the stack) to be compatible
15+
with a stack size of 1mb (as imposed by the JAVA runtime library)
16+
* Added mixed-precision dot function SBDOT and utility functions
17+
shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between
18+
single or double precision float arrays and bfloat16 arrays
19+
* Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions
20+
in lapack.h
21+
* Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2
22+
(causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263)
23+
* Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415)
24+
* Fixed several bugs in the LAPACK testsuite
25+
* Improved performance of TRMM and TRSM for certain problem sizes
26+
* Fixed infinite recursions and workspace miscalculations in ReLAPACK
27+
* CMAKE builds no longer require pkg-config for creating the .pc file
28+
* Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as
29+
enabling these options
30+
* Fixed detection of gfortran when invoked through an mpi wrapper
31+
* Improve thread reinitialization performance with OpenMP xafter a fork
32+
* Added support for building only the subset of the library required
33+
for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE
34+
* Optional function name prefixes and suffixes are now correctly
35+
reflected in the generated cblas.h
36+
* Added CMAKE build support for the LAPACK and multithreading tests
37+
38+
POWER:
39+
* Added optimized support for POWER10
40+
* Added support for compiling for POWER8 in 32bit mode
41+
* Added support for compilation with LLVM/clang
42+
* Added support for compilation with NVIDIA/PGI compilers
43+
* Fixed building on big-endian POWER8
44+
* Fixed miscompilation of ZDOTC by gcc10
45+
* Fixed alignment errors in the POWER8 SAXPY kernel
46+
* Improved CPU detection on AIX
47+
* Supported building with older compilers on POWER9
48+
49+
x86_64:
50+
* Added support for Intel Cooperlake
51+
* Added autodetection of AMD Renoir/Matisse/Zen3 cpus
52+
* Added autodetection of Intel Comet Lake cpus
53+
* Reimplemented ?sum, ?dot and daxpy using universal intrinsics
54+
* Reset the fpu state before using the fpu on Windows as a workaround
55+
for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004)
56+
* Fixed potentially undefined behaviour in the dot and gemv_t kernels
57+
* Fixed a potential segmentation fault in DYNAMIC_ARCH builds
58+
* Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers
59+
60+
ARMV7:
61+
* Fixed cpu detection on BSD-like systems
62+
63+
ARMV8:
64+
* Added preliminary support for Apple Vortex cpus
65+
* Added support for the Cavium ThunderX3T110 cpu
66+
* Fixed cpu detection on BSD-like systems
67+
* Fixed compilation in -std=C18 mode
68+
69+
70+
IBM Z:
71+
* Added support for compiling with the clang compiler
72+
* Improved GEMM performance on Z14
73+
274
====================================================================
375
Version 0.3.10
476
14-Jun-2020

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.10.dev
6+
VERSION = 0.3.11.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

Makefile.x86_64

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
7878
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
7979
CCOMMON_OPT += -mavx2
8080
endif
81+
else
82+
ifeq ($(C_COMPILER), CLANG)
83+
CCOMMON_OPT += -mavx2
84+
endif
8185
endif
8286
ifeq ($(F_COMPILER), GFORTRAN)
8387
# AVX2 support was added in 4.7.0

0 commit comments

Comments
 (0)