Skip to content

Commit dea6114

Browse files
authored
Merge branch 'OpenMathLib:develop' into develop
2 parents bdf093e + d96daa2 commit dea6114

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+3839
-529
lines changed

CMakeLists.txt

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ project(OpenBLAS C ASM)
99

1010
set(OpenBLAS_MAJOR_VERSION 0)
1111
set(OpenBLAS_MINOR_VERSION 3)
12-
set(OpenBLAS_PATCH_VERSION 29.dev)
12+
set(OpenBLAS_PATCH_VERSION 30.dev)
1313

1414
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1515

@@ -152,6 +152,9 @@ endif ()
152152
if (NOT DEFINED BUILD_BFLOAT16)
153153
set (BUILD_BFLOAT16 false)
154154
endif ()
155+
if (NOT DEFINED BUILD_HFLOAT16)
156+
set (BUILD_HFLOAT16 false)
157+
endif ()
155158
# set which float types we want to build for
156159
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
157160
# if none are defined, build for all
@@ -305,8 +308,8 @@ if (USE_OPENMP)
305308
endif()
306309
endif()
307310

308-
# Fix "Argument list too long" for macOS with Intel CPUs and DYNAMIC_ARCH turned on
309-
if(APPLE AND DYNAMIC_ARCH AND (NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64"))
311+
# Fix "Argument list too long" for macOS with POWERPC or Intel CPUs
312+
if(APPLE AND (NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64"))
310313
# Use response files
311314
set(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
312315
# Always build static library first
@@ -541,13 +544,13 @@ message(STATUS "adding postbuild instruction to rename syms")
541544
if (NOT USE_PERL)
542545
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
543546
COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
544-
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so
547+
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so
545548
COMMENT "renaming symbols"
546549
)
547550
else()
548551
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
549552
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
550-
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
553+
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
551554
COMMENT "renaming symbols"
552555
)
553556
endif()

Changelog.txt

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,138 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.30
4+
19-Jun-2025
5+
6+
general:
7+
- fixed an installation problem with the thread safety test in gmake builds
8+
- fixed spurious overwriting of an input array in complex GEMMT/GEMMTR
9+
- fixed naming of GEMMTR in error messages from XERBLA
10+
- fixed compilation of SBGEMMT/SBGEMMTR in CMake builds
11+
- fixed the implementation of ?NRM2 to handle INCX=0 correctly
12+
- removed tests for CSROT and ZDROT that relied on unspecified behavior
13+
- fixed a performance regression in multithreaded GEMM that was particularly
14+
serious on POWER targets
15+
- fixed linking issues when using LLVM's flang-new with gmake
16+
- fixed a potential thread safety problem with C11 atomic operations
17+
- further improved the workload partitioning in parallel GEMM
18+
- fixed omission of LAPACKE interfaces for CGESVDQ,CTRSYL3 and ?GEQPF in
19+
CMake builds
20+
- fixed mishandling of setting NO_LAPACK to FALSE, and incorrect dependencies
21+
for LAPACK function SPMV in CMake builds
22+
- added explicit CMake options for building LAPACKE and shared libraries
23+
- simplified and improved handling of OpenMP options in CMake builds
24+
- reworked Windows DLL generation in CMake builds to ensure correct symbol
25+
renaming (pre/postfixing) and optional generation of PDB files for debugging
26+
- updated the Perl script version of the gensymbol utility for use with
27+
Windows-on-Arm
28+
- Fixed building with (Mingw) gmake on Windows to ensure completeness of the
29+
LAPACK included in the static library (potential race condition due to the
30+
Windows version of the "ln" utility creating snapshot copies rather than links)
31+
- fixed unwanted deletion of the lapacke_mangling.h file by "make clean"
32+
- fixed potential duplication of a _64 suffix on library names in CMake builds
33+
- fixed compilation of the C fallback copies of the LAPACK code with GCC 15
34+
- included fixed from the Reference-LAPACK project:
35+
- fixed a truncated error message in the EIG part of the testsuite
36+
(Reference-LAPACK PR 1119)
37+
- fixed too strict check in LAPACKE_?gesdd_work (PR #1126)
38+
- fixed memory corruption when calling ?GEEV with non-finite data (PR #1128)
39+
- fixed missing initialization of a variable in C/GEQP3RK (PR #1131)
40+
- fixed 2nd dimension chosen in C/ZUNMLQ transposition operation (PR #1135)
41+
42+
x86_64:
43+
- fixed an error in the SBGEMV kernel for Cooper Lake/Sapphire Rapids
44+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
45+
- improved the compiler identification code for flang-new
46+
- fixed a potential build issue in the ZSUM kernel
47+
- fixed "argument list too long" errors when building on MacOS
48+
- added cpu autodetection support for several new Arrow Lake models
49+
- fixed conditional inclusion of the fast path SGEMM kernel in DYNAMIC_ARCH
50+
- fixed compilation with the MinGW build of GCC 15
51+
52+
arm64:
53+
- fixed cpu type detection of A64FX and some ThunderX models (broken in 0.3.29)
54+
- added support for the AmpereOne/1A cpus in DYNAMIC_ ARCH builds
55+
- added an optimized SBGEMM kernel for NEOVERSEV1
56+
- improved 1xN SBGEMM performance by forwarding to SBGEMV
57+
- introduced a stepwise increase of the thread count used for
58+
SGEMM and SGEMV on NEOVERSEV1/V2 in relation to problem size
59+
- introduced a stepwise increase of the thread count used for
60+
DGEMV on NEOVERSEV1 in relation to problem size
61+
- introduced a stepwise increase of the thread count used for
62+
SDOT and DDOT on NEOVERSEV1 in relation to problem size
63+
- worked around assembler limitations in LLVM for Windows-on-Arm
64+
- enabled cpu type autodetection from the registry on Windows-on-Arm
65+
- improved multithreading threshold for GEMV and GESV on Windows-on-Arm
66+
- fixed overoptimization issues with LLVM's flang in Windows-on-Arm
67+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
68+
- added a fast path SGEMM kernel for small workloads on SME capable targets
69+
- improved performance of SGEMM and DGEMM kernels for small workloads
70+
- improved performance of SGEMV and DGEMV on SVE-capable targets
71+
- improved performance of SGEMV on NEOVERSEN1 and Apple M
72+
- added optimized SSYMV and DSYMV kernels for NEOVERSEN1, Apple M and all
73+
SVE capable targets
74+
- added optimized SBGEMV kernels for NEOVERSEV1/V2/N2
75+
- improved performance of SGEMM through faster NCOPY kernels
76+
- added compiler options for the NVIDIA HPC Compiler Suite
77+
- fixed compilation on OSX with XCode 16.3 and later
78+
- fixed cpu core type and cache size detection on Apple M4
79+
- updated GEMM parameter settings for Neoverse cpus in cross-builds with CMake
80+
- fixed default compiler options for NEOVERSEN1 and CORTEXX2 in CMake builds
81+
- fixed conditional inclusion of the fast path SGEMM kernel in DYNAMIC_ARCH
82+
- fixed potential miscompilation of the non-SVE SDOT kernel
83+
84+
riscv64:
85+
- added optimized SROTM and DROTM kernels for x280
86+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
87+
- improved performance of GEMM_TCOPY on RVV1.0 targets with
88+
VLEN of 128 or 256
89+
- improved performance of OMATCOPY on targets with VLEN 256
90+
- greatly improved performance of SGEMV/DGEMV
91+
- improved performance of CGEMV and ZGEMV on C910V and all RVV targets
92+
with VLEN 256
93+
- improved performance of SAXPBY and DAXPBY on C910V and all RVV targets
94+
with VLEN 256
95+
- improved performance of AXPY and DOT on C910V and ZVL256B targets by
96+
falling back to non-vectorized code for very small N. (Thereby fixing
97+
poor performance of CHBMV/ZHBMV for very small K)
98+
- fixed CMake build failures of the TRMM kernels
99+
100+
loongarch64:
101+
- improved performance of the LSX versions of SSYMV/DSYMV
102+
- made the LASX versions of the DSYMV and SSYMV kernels
103+
compatible with hardware changes in LA664 and future targets
104+
- fixed inaccuracies in several LASX kernels
105+
- improved compatibility of LSX kernels with LA264 targets
106+
- fixed handling of deprecated target names in CMake builds
107+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
108+
109+
power:
110+
- fixed building for PPCG4 with CMake
111+
- fixed SSCAL/DSCAL on PPC970 running FreeBSD
112+
- fixed a potential alignment issue in the POWER8 SGEMV kernel
113+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
114+
115+
zarch:
116+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
117+
- fixed unwanted generation of object files with a writable stack
118+
119+
x86:
120+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
121+
- worked around potential miscompilation of CDOT with very old binutils
122+
123+
arm:
124+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
125+
- fixed unwanted generation of object files with a writable stack
126+
127+
sparc:
128+
- fixed corner cases of NAN and INF input handling in CSCAL and ZSCAL
129+
130+
alpha:
131+
- fixed build failure caused by spurious Windows-only typecasts
132+
133+
cell:
134+
- fixed probable build issue caused by spurious Windows-only typecasts
135+
2136
====================================================================
3137
Version 0.3.29
4138
12-Jan-2025

Makefile.arm64

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,16 @@ endif
191191
endif
192192
endif
193193

194+
# Detect Ampere AmpereOne(ampere1,ampere1a) processors.
195+
ifeq ($(CORE), AMPERE1)
196+
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
197+
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
198+
ifneq ($(F_COMPILER), NAG)
199+
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
200+
endif
201+
endif
202+
endif
203+
194204
# Use a53 tunings because a55 is only available in GCC>=8.1
195205
ifeq ($(CORE), CORTEXA55)
196206
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))

Makefile.power

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ ifeq ($(CORE), POWER10)
1313
ifneq ($(C_COMPILER), PGI)
1414
ifeq ($(C_COMPILER), GCC)
1515
ifeq ($(GCCVERSIONGTEQ10), 1)
16-
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
16+
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
1717
else ifneq ($(GCCVERSIONGT4), 1)
1818
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
19-
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
19+
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
2020
else
2121
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
22-
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
22+
CCOMMON_OPT += -O3 -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
2323
endif
2424
else
25-
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
25+
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
2626
endif
2727
ifeq ($(F_COMPILER), IBM)
2828
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
@@ -34,7 +34,7 @@ endif
3434

3535
ifeq ($(CORE), POWER9)
3636
ifneq ($(C_COMPILER), PGI)
37-
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
37+
CCOMMON_OPT += -O3 -mvsx -fno-fast-math
3838
ifeq ($(C_COMPILER), GCC)
3939
ifneq ($(GCCVERSIONGT4), 1)
4040
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
@@ -70,7 +70,7 @@ endif
7070

7171
ifeq ($(CORE), POWER8)
7272
ifneq ($(C_COMPILER), PGI)
73-
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
73+
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
7474
else
7575
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
7676
endif

Makefile.prebuild

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
6464
endif
6565

6666
ifeq ($(TARGET), RISCV64_ZVL256B)
67-
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
67+
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
6868
endif
6969

7070
ifeq ($(TARGET), RISCV64_ZVL128B)
71-
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
71+
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
7272
endif
7373

7474
ifeq ($(TARGET), RISCV64_GENERIC)

Makefile.riscv64

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d
77
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
88
endif
99
ifeq ($(CORE), RISCV64_ZVL256B)
10-
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d
11-
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
10+
CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d
11+
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
1212
endif
1313
ifeq ($(CORE), RISCV64_ZVL128B)
14-
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
15-
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
14+
CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
15+
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
1616
endif
1717
ifeq ($(CORE), RISCV64_GENERIC)
1818
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d

Makefile.rule

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.29.dev
6+
VERSION = 0.3.30.dev
77

88
# If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a
99
# and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library
@@ -308,6 +308,8 @@ COMMON_PROF = -pg
308308
# If you want to enable the experimental BFLOAT16 support
309309
# BUILD_BFLOAT16 = 1
310310

311+
# If you want to enable the experimental HFLOAT16 support
312+
# BUILD_HFLOAT16 = 1
311313

312314
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
313315
# will be allocated on the heap rather than the stack. (This array alone requires

Makefile.system

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,8 @@ GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
393393
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
394394
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
395395
GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12)
396+
GCCVERSIONGTEQ13 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 13)
397+
GCCVERSIONGTEQ14 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 14)
396398
# Note that the behavior of -dumpversion is compile-time-configurable for
397399
# gcc-7.x and newer. Use -dumpfullversion there
398400
ifeq ($(GCCVERSIONGTEQ7),1)
@@ -1191,6 +1193,13 @@ endif
11911193
else ifeq ($(ARCH), $(filter $(ARCH),mips))
11921194
FCOMMON_OPT += -mabi=32
11931195
endif
1196+
ifeq ($(ARCH), $(filter $(ARCH),loongarch64))
1197+
ifdef INTERFACE64
1198+
ifneq ($(INTERFACE64), 0)
1199+
FCOMMON_OPT += -fdefault-integer-8
1200+
endif
1201+
endif
1202+
endif
11941203
else
11951204
ifdef BINARY64
11961205
ifneq ($(OSNAME), AIX)
@@ -1547,6 +1556,9 @@ endif
15471556
ifeq ($(BUILD_BFLOAT16), 1)
15481557
CCOMMON_OPT += -DBUILD_BFLOAT16
15491558
endif
1559+
ifeq ($(BUILD_HFLOAT16), 1)
1560+
CCOMMON_OPT += -DBUILD_HFLOAT16
1561+
endif
15501562
ifeq ($(BUILD_SINGLE), 1)
15511563
CCOMMON_OPT += -DBUILD_SINGLE=1
15521564
endif
@@ -1889,11 +1901,14 @@ export TARGET_CORE
18891901
export NO_AVX512
18901902
export NO_AVX2
18911903
export BUILD_BFLOAT16
1904+
export BUILD_HFLOAT16
18921905
export NO_LSX
18931906
export NO_LASX
18941907

18951908
export SBGEMM_UNROLL_M
18961909
export SBGEMM_UNROLL_N
1910+
export SHGEMM_UNROLL_M
1911+
export SHGEMM_UNROLL_N
18971912
export SGEMM_UNROLL_M
18981913
export SGEMM_UNROLL_N
18991914
export DGEMM_UNROLL_M

Makefile.tail

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
2+
SHBLASPBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
23
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
34
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
45
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
@@ -11,8 +12,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
1112

1213
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
1314

14-
BLASOBJS = $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
15-
BLASOBJS_P = $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
15+
BLASOBJS = $(SHBLASOBJS) $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
16+
BLASOBJS_P = $(SHBLASPBJS_P) $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
1617

1718
ifdef EXPRECISION
1819
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
@@ -24,6 +25,7 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
2425
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
2526
endif
2627

28+
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHFLOAT16 -UDOUBLE -UCOMPLEX
2729
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
2830
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
2931
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
@@ -33,6 +35,7 @@ $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
3335
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
3436
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
3537

38+
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
3639
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
3740
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
3841
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)

0 commit comments

Comments
 (0)