Skip to content

Commit c5f8aef

Browse files
authored
Merge branch 'develop' into fbsd12
2 parents 44c81fd + 8278cbe commit c5f8aef

File tree

13 files changed

+126
-34
lines changed

13 files changed

+126
-34
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 4.dev)
9+
set(OpenBLAS_PATCH_VERSION 5.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions

Changelog.txt

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,77 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.4
4+
02-Dec-2018
5+
6+
common:
7+
* the new, experimental thread-local memory allocation had
8+
inadvertently been left enabled for gmake builds in 0.3.3
9+
despite the announcement. It is now disabled by default, and
10+
single-threaded builds will keep using the old allocator even
11+
if the USE_TLS option is turned on.
12+
* OpenBLAS will now provide enough buffer space for at least 50
13+
threads by default.
14+
* The output of openblas_get_config() now contains the version
15+
number.
16+
* A serious thread safety bug in GEMV operation with small M and
17+
large N size has been fixed.
18+
* The code will now automatically call blas_thread_init after a
19+
fork if needed before handling a call to openblas_set_num_threads
20+
* Accesses to parallelized level3 functions from multiple callers
21+
are now serialized to avoid thread races (unless using OpenMP).
22+
This should provide better performance than the known-threadsafe
23+
(but non-default) USE_SIMPLE_THREADED_LEVEL3 option.
24+
* When building LAPACK with gfortran, -frecursive is now (again)
25+
enabled by default to ensure correct behaviour.
26+
* The OpenBLAS version cblas.h now supports both CBLAS_ORDER and
27+
CBLAS_LAYOUT as the name of the matrix row/column order option.
28+
* Externally set LDFLAGS are now passed through to the final compile/link
29+
steps to facilitate setting platform-specific linker flags.
30+
* A potential race condition during the build of LAPACK (that would
31+
usually manifest itself as a failure to build TESTING/MATGEN) has been
32+
fixed.
33+
* xHEMV has been changed to stay single-threaded for small input sizes
34+
where the overhead of multithreading exceeds any possible gains
35+
* CSWAP and ZSWAP have been limited to a single thread except on ARMV8 or
36+
ThunderX hardware with sizable input.
37+
* Linker flags for the PGI compiler have been updated
38+
* Behaviour of AXPY with zero increments is now handled in the C interface,
39+
correcting the result on at least Intel Atom.
40+
* The result matrix from calling SGELSS with an all-zero input matrix is
41+
now zeroed completely.
42+
43+
x86_64:
44+
* Autodetection of AMD Ryzen2 has been fixed (again).
45+
* CMAKE builds now support labeling of an INTERFACE64=1 build of
46+
the library with the _64 suffix.
47+
* AVX512 version of DGEMM has been added and the AVX512 SGEMM kernel
48+
has been sped up by rewriting with C intrinsics
49+
* Fixed compilation on RHEL5/CENTOS5 (issue with typename __WAIT_STATUS)
50+
51+
POWER:
52+
* added support for building on AIX (with gcc and GNU tools from AIX Toolbox).
53+
* CPU type detection has been implemented for AIX.
54+
* CPU type detection has been fixed for NETBSD.
55+
56+
MIPS64:
57+
* AXPY on LOONGSON3A has been corrected to pass "zero increment" utest.
58+
* DSDOT on LOONGSON3A has been fixed.
59+
* the SGEMM microkernel has been hardened against potential data loss.
60+
61+
ARMV8:
62+
* DYNAMic_ARCH support is now available for 64bit ARM
63+
* cross-compiling for ARMV8 under iOS now works.
64+
* cpu-specific code has been rearranged to make better use of both
65+
hardware commonalities and model-specific compiler optimizations.
66+
* XGENE1 has been removed as a TARGET, superseded by the improved generic
67+
ARMV8 support.
68+
69+
ARMV7:
70+
* Older assembly mnemonics have been converted to UAL form to allow
71+
building with clang 7.0
72+
* Cross compiling LAPACKE for Android has been fixed again (broken by
73+
update to LAPACK 3.7.0 some while ago).
74+
275
====================================================================
376
Version 0.3.3
477
31-Aug-2018

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.4.dev
6+
VERSION = 0.3.5.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

Makefile.system

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ else ifeq ($(ARCH), i386)
1818
override ARCH=x86
1919
else ifeq ($(ARCH), aarch64)
2020
override ARCH=arm64
21-
endif
21+
endif
2222

2323
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
2424

@@ -1042,6 +1042,8 @@ ifdef USE_TLS
10421042
CCOMMON_OPT += -DUSE_TLS
10431043
endif
10441044

1045+
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
1046+
10451047
ifndef SYMBOLPREFIX
10461048
SYMBOLPREFIX =
10471049
endif

cmake/system.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ if (MIXED_MEMORY_ALLOCATION)
310310
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
311311
endif ()
312312

313+
set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"")
314+
313315
set(REVISION "-r${OpenBLAS_VERSION}")
314316
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
315317

cmake/system_check.cmake

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@ if (${HOST_OS} STREQUAL "WINDOWS")
1010
set(HOST_OS WINNT)
1111
endif ()
1212

13+
if (${HOST_OS} STREQUAL "LINUX")
14+
# check if we're building natively on Android (TERMUX)
15+
EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
16+
if(${OPERATING_SYSTEM} MATCHES "Android")
17+
set(HOST_OS ANDROID)
18+
endif(${OPERATING_SYSTEM} MATCHES "Android")
19+
endif()
20+
21+
22+
1323
if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
1424
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
1525
OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE

cpuid_power.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,9 @@ int detect(void){
175175
return CPUTYPE_PPC970;
176176
#endif
177177

178-
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
178+
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
179179
int id;
180-
id = __asm __volatile("mfpvr %0" : "=r"(id));
180+
__asm __volatile("mfpvr %0" : "=r"(id));
181181
switch ( id >> 16 ) {
182182
case 0x4e: // POWER9
183183
return CPUTYPE_POWER8;

driver/others/memory.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2586,7 +2586,7 @@ void *blas_memory_alloc(int procpos){
25862586
printf("Alloc Start ...\n");
25872587
#endif
25882588

2589-
#if defined(WHEREAMI) && !defined(USE_OPENMP)
2589+
/* #if defined(WHEREAMI) && !defined(USE_OPENMP)
25902590
25912591
mypos = WhereAmI();
25922592
@@ -2596,20 +2596,20 @@ void *blas_memory_alloc(int procpos){
25962596
do {
25972597
if (!memory[position].used && (memory[position].pos == mypos)) {
25982598
LOCK_COMMAND(&alloc_lock);
2599-
/* blas_lock(&memory[position].lock);*/
2599+
// blas_lock(&memory[position].lock);
26002600
26012601
if (!memory[position].used) goto allocation;
26022602
26032603
UNLOCK_COMMAND(&alloc_lock);
2604-
/* blas_unlock(&memory[position].lock);*/
2604+
// blas_unlock(&memory[position].lock);
26052605
}
26062606
26072607
position ++;
26082608
26092609
} while (position < NUM_BUFFERS);
26102610
26112611
2612-
#endif
2612+
#endif */
26132613

26142614
position = 0;
26152615

driver/others/openblas_get_config.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4242
#endif
4343

4444
static char* openblas_config_str=""
45+
"OpenBLAS "
46+
VERSION
47+
" "
4548
#ifdef USE64BITINT
46-
"USE64BITINT "
49+
" USE64BITINT "
4750
#endif
4851
#ifdef NO_CBLAS
4952
"NO_CBLAS "

kernel/mips64/KERNEL

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
CAXPYKERNEL = ../mips/zaxpy.c
22
ZAXPYKERNEL = ../mips/zaxpy.c
3-
SROTKERNEL = ../mips/rot.c
4-
DROTKERNEL = ../mips/rot.c
5-
CROTKERNEL = ../mips/zrot.c
6-
ZROTKERNEL = ../mips/zrot.c
3+
SROTKERNEL = ../mips/rot.c
4+
DROTKERNEL = ../mips/rot.c
5+
CROTKERNEL = ../mips/zrot.c
6+
ZROTKERNEL = ../mips/zrot.c
77
CSWAPKERNEL = ../mips/zswap.c
88
ZSWAPKERNEL = ../mips/zswap.c
9-
9+
10+
1011
ifndef SNRM2KERNEL
1112
SNRM2KERNEL = snrm2.S
1213
endif

0 commit comments

Comments
 (0)