Skip to content

Commit 947fda6

Browse files
authored
Merge branch 'OpenMathLib:develop' into forklock
2 parents a7c0960 + 0c59ae0 commit 947fda6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1693
-167
lines changed

.cirrus.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ task:
127127
FreeBSD_task:
128128
name: FreeBSD-gcc
129129
freebsd_instance:
130-
image_family: freebsd-14-2
130+
image_family: freebsd-14-3
131131
install_script:
132132
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
133133
compile_script:
@@ -138,7 +138,7 @@ FreeBSD_task:
138138
FreeBSD_task:
139139
name: freebsd-gcc-ilp64
140140
freebsd_instance:
141-
image_family: freebsd-14-2
141+
image_family: freebsd-14-3
142142
install_script:
143143
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
144144
compile_script:
@@ -148,7 +148,7 @@ FreeBSD_task:
148148
FreeBSD_task:
149149
name: FreeBSD-clang-openmp
150150
freebsd_instance:
151-
image_family: freebsd-14-2
151+
image_family: freebsd-14-3
152152
install_script:
153153
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
154154
- ln -s /usr/local/lib/gcc13/libgfortran.so.5.0.0 /usr/lib/libgfortran.so

.github/workflows/riscv64_vector.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64
2727
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64
2828
- target: RISCV64_ZVL256B
29-
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
29+
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64 BUILD_BFLOAT16=1 BUILD_HFLOAT16=1
3030
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
3131
- target: DYNAMIC_ARCH=1
3232
opts: TARGET=RISCV64_GENERIC BINARY=64 ARCH=riscv64 DYNAMIC_ARCH=1
@@ -40,7 +40,7 @@ jobs:
4040
run: |
4141
sudo apt-get update
4242
sudo apt-get install autoconf automake autotools-dev ninja-build make \
43-
libgomp1-riscv64-cross ccache
43+
libgomp1-riscv64-cross ccache qemu-kvm
4444
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path}
4545
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt
4646

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ test/SBLAT3_3M.SUMM
8080
test/ZBLAT2.SUMM
8181
test/ZBLAT3.SUMM
8282
test/ZBLAT3_3M.SUMM
83+
test/SHBLAT2.SUMM
8384
test/SHBLAT3.SUMM
8485
test/SBBLAT2.SUMM
8586
test/SBBLAT3.SUMM
@@ -98,6 +99,7 @@ test/sblat2
9899
test/sblat3
99100
test/sblat3_3m
100101
test/test_shgemm
102+
test/test_shgemv
101103
test/test_sbgemm
102104
test/test_sbgemv
103105
test/test_bgemm

azure-pipelines.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ jobs:
9595
vmImage: 'windows-latest'
9696
steps:
9797
- script: |
98-
mingw32-make CC=gcc FC=gfortran DYNAMIC_ARCH=1 DYNAMIC_LIST="SANDYBRIDGE"
98+
mingw32-make CC=gcc NOLAPACK=1 DYNAMIC_ARCH=1 DYNAMIC_LIST="SANDYBRIDGE"
9999
100100
- job: Windows_clang_cmake
101101
pool:
@@ -201,7 +201,7 @@ jobs:
201201
- script: |
202202
brew update
203203
brew install llvm libomp
204-
make TARGET=CORE2 USE_OPENMP=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang NOFORTRAN=1
204+
make TARGET=CORE2 USE_OPENMP=1 DYNAMIC_ARCH=1 DYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' CC=/usr/local/opt/llvm/bin/clang NOFORTRAN=1
205205
206206
- job: OSX_OpenMP_Clang_cmake
207207
pool:
@@ -215,7 +215,7 @@ jobs:
215215
brew install llvm libomp
216216
mkdir build
217217
cd build
218-
cmake -DTARGET=CORE2 -DUSE_OPENMP=1 -DINTERFACE64=1 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DNOFORTRAN=1 -DNO_AVX512=1 ..
218+
cmake -DTARGET=CORE2 -DUSE_OPENMP=1 -DINTERFACE64=1 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DNOFORTRAN=1 -DNO_AVX512=1 ..
219219
make
220220
ctest
221221

cmake/cc.cmake

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ endif ()
213213

214214
if (${CORE} STREQUAL A64FX)
215215
if (NOT DYNAMIC_ARCH)
216-
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
216+
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC" AND NOT NO_SVE)
217217
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=a64fx")
218218
elseif (${GCC_VERSION} VERSION_GREATER 11.0 OR ${GCC_VERSION} VERSION_EQUAL 11.0)
219219
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve -mtune=a64fx")
@@ -227,7 +227,7 @@ if (${CORE} STREQUAL NEOVERSEV2)
227227
if (NOT DYNAMIC_ARCH)
228228
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
229229
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-v2")
230-
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
230+
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC" AND NOT NO_SVE)
231231
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v2")
232232
else ()
233233
if (${GCC_VERSION} VERSION_GREATER 13.0 OR ${GCC_VERSION} VERSION_EQUAL 13.0)
@@ -245,7 +245,7 @@ if (${CORE} STREQUAL NEOVERSEN2)
245245
if (NOT DYNAMIC_ARCH)
246246
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
247247
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
248-
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
248+
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC" AND NOT NO_SVE)
249249
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v2")
250250
else ()
251251
if (${GCC_VERSION} VERSION_GREATER 11.1 OR ${GCC_VERSION} VERSION_EQUAL 11.1)
@@ -261,7 +261,7 @@ if (${CORE} STREQUAL NEOVERSEV1)
261261
if (NOT DYNAMIC_ARCH)
262262
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
263263
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
264-
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
264+
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC" AND NOT NO_SVE)
265265
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v1")
266266
else ()
267267
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
@@ -275,7 +275,7 @@ endif ()
275275

276276
if (${CORE} STREQUAL NEOVERSEN1)
277277
if (NOT DYNAMIC_ARCH)
278-
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
278+
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC" AND NOT NO_SVE)
279279
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-n1")
280280
elseif (${GCC_VERSION} VERSION_GREATER 9.4 OR ${GCC_VERSION} VERSION_EQUAL 9.4)
281281
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a -mtune=neoverse-n1")
@@ -287,7 +287,7 @@ endif ()
287287

288288
if (${CORE} STREQUAL AMPEREONE)
289289
if (NOT DYNAMIC_ARCH)
290-
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVC")
290+
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC")
291291
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-n1")
292292
elseif (${GCC_VERSION} VERSION_GREATER 12.1)
293293
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.6-a+crypto+crc+fp16+sha3+rng -mtune=ampereone")
@@ -301,7 +301,7 @@ if (${CORE} STREQUAL ARMV8SVE)
301301
if (NOT DYNAMIC_ARCH)
302302
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
303303
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8-a+sve")
304-
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
304+
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC" AND NOT NO_SVE)
305305
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=host")
306306
else ()
307307
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
@@ -311,7 +311,7 @@ endif ()
311311

312312
if (${CORE} STREQUAL ARMV9SME)
313313
if (NOT DYNAMIC_ARCH)
314-
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
314+
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC" AND NOT NO_SVE)
315315
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=host")
316316
else ()
317317
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv9-a+sme")
@@ -391,6 +391,30 @@ if (${CORE} STREQUAL PPCG4)
391391
endif ()
392392
endif ()
393393

394+
395+
if ((${CORE} STREQUAL RISCV64_ZVL128B) OR (${CORE} STREQUAL RISCV64_ZVL256B))
396+
set (RISCV64_OPT "rv64imafdcv")
397+
if (BUILD_BFLOAT16)
398+
set (RISCV64_OPT "${RISCV64_OPT}_zvfbfwma")
399+
endif()
400+
if (BUILD_HFLOAT16)
401+
set (RISCV64_OPT "${RISCV64_OPT}_zvfh_zfh")
402+
endif()
403+
if (${CORE} STREQUAL RISCV64_ZVL256B)
404+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=${RISCV64_OPT}_zvl256b -mabi=lp64d")
405+
endif()
406+
if (${CORE} STREQUAL RISCV64_ZVL128B)
407+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=${RISCV64_OPT}_zvl128b -mabi=lp64d")
408+
endif()
409+
endif()
410+
if (${CORE} STREQUAL RISCV64_GENERIC)
411+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=rv64imafdc -mabi=lp64d")
412+
endif()
413+
if (${CORE} STREQUAL x280)
414+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d")
415+
endif()
416+
417+
394418
if (NOT DYNAMIC_ARCH)
395419
if (HAVE_AVX2)
396420
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")

cmake/kernel.cmake

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ if (BUILD_BFLOAT16)
175175
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
176176
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
177177
endif ()
178+
if (BUILD_HFLOAT16)
179+
SetFallback(SHGEMVNKERNEL ../generic/gemv_n.c)
180+
SetFallback(SHGEMVTKERNEL ../generic/gemv_t.c)
181+
endif ()
178182
endmacro ()
179183

180184
macro(SetDefaultL2)
@@ -228,6 +232,10 @@ if (BUILD_BFLOAT16)
228232
SetFallback(BGEMVTKERNEL ../generic/gemv_t.c)
229233
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
230234
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
235+
endif ()
236+
if (BUILD_HFLOAT16)
237+
SetFallback(SHGEMVNKERNEL ../generic/gemv_n.c)
238+
SetFallback(SHGEMVTKERNEL ../generic/gemv_t.c)
231239
SetFallback(SHGERKERNEL ../generic/ger.c)
232240
endif ()
233241
endmacro ()
@@ -260,5 +268,16 @@ if (BUILD_BFLOAT16)
260268
SetFallback(SBGEMMONCOPYOBJ sbgemm_oncopy.o)
261269
SetFallback(SBGEMMOTCOPYOBJ sbgemm_otcopy.o)
262270
endif ()
263-
271+
if (BUILD_HFLOAT16)
272+
SetFallback(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
273+
SetFallback(SHGEMM_BETA ../generic/gemm_beta.c)
274+
SetFallback(SHGEMMINCOPY ../generic/gemm_ncopy_2.c)
275+
SetFallback(SHGEMMITCOPY ../generic/gemm_tcopy_2.c)
276+
SetFallback(SHGEMMONCOPY ../generic/gemm_ncopy_2.c)
277+
SetFallback(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c)
278+
SetFallback(SHGEMMINCOPYOBJ shgemm_incopy.o)
279+
SetFallback(SHGEMMITCOPYOBJ shgemm_itcopy.o)
280+
SetFallback(SHGEMMONCOPYOBJ shgemm_oncopy.o)
281+
SetFallback(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
282+
endif ()
264283
endmacro ()

cmake/system.cmake

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,28 @@ if (${TARGET} STREQUAL NEOVERSEV1)
381381
endif()
382382
endif()
383383

384+
if ((${TARGET} STREQUAL RISCV64_ZVL128B) OR (${TARGET} STREQUAL RISCV64_ZVL256B))
385+
set (RISCV64_OPT "rv64imafdcv")
386+
if (BUILD_BFLOAT16)
387+
set (RISCV64_OPT "${RISCV64_OPT}_zvfbfwma")
388+
endif()
389+
if (BUILD_HFLOAT16)
390+
set (RISCV64_OPT "${RISCV64_OPT}_zvfh_zfh")
391+
endif()
392+
if (${TARGET} STREQUAL RISCV64_ZVL256B)
393+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=${RISCV64_OPT}_zvl256b -mabi=lp64d")
394+
endif()
395+
if (${TARGET} STREQUAL RISCV64_ZVL128B)
396+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=${RISCV64_OPT}_zvl128b -mabi=lp64d")
397+
endif()
398+
endif()
399+
if (${TARGET} STREQUAL RISCV64_GENERIC)
400+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=rv64imafdc -mabi=lp64d")
401+
endif()
402+
if (${TARGET} STREQUAL x280)
403+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d")
404+
endif()
405+
384406
endif()
385407

386408
if (DEFINED BINARY)

cmake/utils.cmake

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,12 @@ function(GenerateNamedObjects sources_in)
375375
if (NOT no_float_type)
376376
string(SUBSTRING ${float_type} 0 1 float_char)
377377
string(TOLOWER ${float_char} float_char)
378-
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEM")
379-
set (float_char "sb")
380-
endif ()
378+
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEM")
379+
set (float_char "sb")
380+
endif ()
381+
if (${float_type} STREQUAL "HFLOAT16" AND NOT "${defines_in}" MATCHES "HGEM")
382+
set (float_char "sh")
383+
endif ()
381384
endif ()
382385

383386
if (NOT name_in)

common_interface.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,8 @@ void BLASFUNC(bgemv)(char *, blasint *, blasint *, bfloat16 *, bfloat16 *, blas
261261
bfloat16 *, blasint *, bfloat16 *, bfloat16 *, blasint *);
262262
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
263263
bfloat16 *, blasint *, float *, float *, blasint *);
264+
void BLASFUNC(shgemv)(char *, blasint *, blasint *, float *, hfloat16 *, blasint *,
265+
hfloat16 *, blasint *, float *, float *, blasint *);
264266
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
265267
float *, blasint *, float *, float *, blasint *);
266268
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,

common_level2.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLO
5454
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
5555
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
5656
int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
57+
int shgemv_n(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG);
58+
int shgemv_t(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG);
59+
int shgemv_thread_n(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG, int);
60+
int shgemv_thread_t(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG, int);
5761
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
5862
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
5963
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);

0 commit comments

Comments
 (0)