Skip to content

Commit 2c552f1

Browse files
authored
Change "HALF" and "sh" to "BFLOAT16" and "sb"
1 parent 7ae9e89 commit 2c552f1

File tree

4 files changed

+48
-36
lines changed

4 files changed

+48
-36
lines changed

CMakeLists.txt

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
2929
else()
3030
set(NO_AFFINITY 1)
3131
endif()
32-
option(BUILD_SINGLE "Single precision" OFF)
33-
option(BUILD_DOUBLE "Double precision" OFF)
34-
option(BUILD_COMPLEX "Single precision" OFF)
35-
option(BUILD_COMPLEX16 "Single precision" OFF)
32+
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
33+
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
3634

3735
# Add a prefix or suffix to all exported symbol names in the shared library.
3836
# Avoids conflicts with other BLAS libraries, especially when using
@@ -91,13 +89,13 @@ if (NOT NO_LAPACK)
9189
list(APPEND SUBDIRS lapack)
9290
endif ()
9391

94-
if (NOT DEFINED BUILD_HALF)
95-
set (BUILD_HALF false)
92+
if (NOT DEFINED BUILD_BFLOAT16)
93+
set (BUILD_BFLOAT16 false)
9694
endif ()
9795
# set which float types we want to build for
9896
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
9997
# if none are defined, build for all
100-
# set(BUILD_HALF true)
98+
# set(BUILD_BFLOAT16 true)
10199
set(BUILD_SINGLE true)
102100
set(BUILD_DOUBLE true)
103101
set(BUILD_COMPLEX true)
@@ -110,33 +108,28 @@ endif()
110108

111109
set(FLOAT_TYPES "")
112110
if (BUILD_SINGLE)
113-
message(STATUS "Building Songle Precision")
114-
list(APPEND FLOAT_TYPES "SINGLE")
115-
# set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
111+
message(STATUS "Building Single Precision")
112+
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
116113
endif ()
117114

118115
if (BUILD_DOUBLE)
119116
message(STATUS "Building Double Precision")
120-
list(APPEND FLOAT_TYPES "DOUBLE")
121-
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1")
117+
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
122118
endif ()
123119

124120
if (BUILD_COMPLEX)
125121
message(STATUS "Building Complex Precision")
126-
list(APPEND FLOAT_TYPES "COMPLEX")
127-
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1")
128-
endif ()
122+
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
123+
endif ()
129124

130125
if (BUILD_COMPLEX16)
131126
message(STATUS "Building Double Complex Precision")
132-
list(APPEND FLOAT_TYPES "ZCOMPLEX")
133-
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1")
127+
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
134128
endif ()
135129

136-
if (BUILD_HALF)
130+
if (BUILD_BFLOAT16)
137131
message(STATUS "Building Half Precision")
138-
list(APPEND FLOAT_TYPES "HALF")
139-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF")
132+
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
140133
endif ()
141134

142135
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
@@ -243,6 +236,9 @@ if (NOT MSVC AND NOT NOFORTRAN)
243236
add_subdirectory(ctest)
244237
endif()
245238
add_subdirectory(lapack-netlib/TESTING)
239+
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
240+
add_subdirectory(cpp_thread_test)
241+
endif()
246242
endif()
247243

248244
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES

Makefile.rule

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -272,17 +272,33 @@ COMMON_PROF = -pg
272272
# work at all.
273273
#
274274
# CPP_THREAD_SAFETY_TEST = 1
275+
#
276+
# use this to run only the less memory-hungry GEMV test
277+
# CPP_THREAD_SAFETY_GEMV = 1
275278

276279

277280
# If you want to enable the experimental BFLOAT16 support
278-
# BUILD_HALF = 1
279-
#
280-
# Select if you need to build only select types
281-
# BUILD_SINGLE = 1
282-
# BUILD_DOUBLE = 1
283-
# BUILD_COMPLEX = 1
284-
# BUILD_COMPLEX16 = 1
285-
#
286-
#
281+
# BUILD_BFLOAT16 = 1
282+
283+
284+
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
285+
# will be allocated on the heap rather than the stack. (This array alone requires
286+
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
287+
# counts, but obviously it is not the only item that ends up on the stack.
288+
# The default value of 32 ensures that the overall requirement is compatible
289+
# with the default 1MB stacksize imposed by having the Java VM loaded without use
290+
# of its -Xss parameter.
291+
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
292+
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
293+
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
294+
# BLAS3_MEM_ALLOC_THRESHOLD = 160
295+
296+
297+
298+
# the below is not yet configurable, use cmake if you need to build only select types
299+
BUILD_SINGLE = 1
300+
BUILD_DOUBLE = 1
301+
BUILD_COMPLEX = 1
302+
BUILD_COMPLEX16 = 1
287303
# End of user configuration
288304
#

Makefile.system

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,8 +1232,8 @@ ifeq ($(USE_TLS), 1)
12321232
CCOMMON_OPT += -DUSE_TLS
12331233
endif
12341234

1235-
ifeq ($(BUILD_HALF), 1)
1236-
CCOMMON_OPT += -DBUILD_HALF
1235+
ifeq ($(BUILD_BFLOAT16), 1)
1236+
CCOMMON_OPT += -DBUILD_BFLOAT16
12371237
endif
12381238
ifeq ($(BUILD_SINGLE), 1)
12391239
CCOMMON_OPT += -DBUILD_SINGLE=1
@@ -1521,10 +1521,10 @@ export KERNELDIR
15211521
export FUNCTION_PROFILE
15221522
export TARGET_CORE
15231523
export NO_AVX512
1524-
export BUILD_HALF
1524+
export BUILD_BFLOAT16
15251525

1526-
export SHGEMM_UNROLL_M
1527-
export SHGEMM_UNROLL_N
1526+
export SBGEMM_UNROLL_M
1527+
export SBGEMM_UNROLL_N
15281528
export SGEMM_UNROLL_M
15291529
export SGEMM_UNROLL_N
15301530
export DGEMM_UNROLL_M

Makefile.tail

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
2424
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
2525
endif
2626

27-
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
27+
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
2828
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
2929
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
3030
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
3131
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
3232
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
3333
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
34-
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
34+
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
3535

3636
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
3737
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)

0 commit comments

Comments
 (0)